diff --git a/.binder/requirements.txt b/.binder/requirements.txt
index 51ca95be6785e..0a2352aa9b22b 100644
--- a/.binder/requirements.txt
+++ b/.binder/requirements.txt
@@ -1,4 +1,4 @@
---find-links https://pypi.anaconda.org/scipy-wheels-nightly/simple/scikit-learn
+--find-links https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/scikit-learn
 --pre
 matplotlib
 scikit-image
diff --git a/.circleci/config.yml b/.circleci/config.yml
index 4408d2bc36de7..eedc286a5a5f2 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -11,7 +11,7 @@ jobs:
           command: |
             source build_tools/shared.sh
             # Include pytest compatibility with mypy
-            pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint
+            pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint
       - run:
           name: linting
           command: ./build_tools/linting.sh
diff --git a/.cirrus.star b/.cirrus.star
index 8b3de0d10c532..f0b458d74289a 100644
--- a/.cirrus.star
+++ b/.cirrus.star
@@ -14,7 +14,7 @@ def main(ctx):
 
     # Nightly jobs always run
     if env.get("CIRRUS_CRON", "") == "nightly":
-        return fs.read(arm_wheel_yaml)
+        return fs.read(arm_wheel_yaml) + fs.read(arm_tests_yaml)
 
     # Get commit message for event. We can not use `git` here because there is
     # no command line access in starlark. Thus we need to query the GitHub API
@@ -26,10 +26,12 @@ def main(ctx):
     response = http.get(url).json()
     commit_msg = response["message"]
 
-    if "[skip ci]" in commit_msg:
-        return []
+    jobs_to_run = ""
 
     if "[cd build]" in commit_msg or "[cd build cirrus]" in commit_msg:
-        return fs.read(arm_wheel_yaml) + fs.read(arm_tests_yaml)
+        jobs_to_run += fs.read(arm_wheel_yaml)
+
+    if "[cirrus arm]" in commit_msg:
+        jobs_to_run += fs.read(arm_tests_yaml)
 
-    return fs.read(arm_tests_yaml)
+    return jobs_to_run
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 66991b140c2b6..1c7043f0bd7ca 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -28,3 +28,6 @@ d4aad64b1eb2e42e76f49db2ccfbe4b4660d092b
 
 # PR 26110: Update black to 23.3.0
 893d5accaf9d16f447645e704f85a216187564f7
+
+# PR 26649: Add isort and ruff rules
+42173fdb34b5aded79664e045cada719dfbe39dc
diff --git a/.github/scripts/label_title_regex.py b/.github/scripts/label_title_regex.py
index ddf9bda3492de..a022c3c4dd2a7 100644
--- a/.github/scripts/label_title_regex.py
+++ b/.github/scripts/label_title_regex.py
@@ -1,10 +1,11 @@
 """Labels PRs based on title. Must be run in a github action with the
 pull_request_target event."""
-from github import Github
-import os
 import json
+import os
 import re
 
+from github import Github
+
 context_dict = json.loads(os.getenv("CONTEXT_GITHUB"))
 
 repo = context_dict["repository"]
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
new file mode 100644
index 0000000000000..cb86f84baa494
--- /dev/null
+++ b/.github/workflows/lint.yml
@@ -0,0 +1,104 @@
+# This linter job on GH actions is used to trigger the commenter bot
+# in bot-lint-comment.yml file. It stores the output of the linter to be used
+# by the commenter bot.
+name: linter
+
+on:
+  - pull_request_target
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+
+    # setting any permission will set everything else to none for GITHUB_TOKEN
+    permissions:
+      pull-requests: none
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.11
+
+      - name: Install dependencies
+        run: |
+          curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/shared.sh --retry 5 -o ./build_tools/shared.sh
+          source build_tools/shared.sh
+          # Include pytest compatibility with mypy
+          pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint
+          # we save the versions of the linters to be used in the error message later.
+          python -c "from importlib.metadata import version; print(f\"ruff={version('ruff')}\")" >> /tmp/versions.txt
+          python -c "from importlib.metadata import version; print(f\"mypy={version('mypy')}\")" >> /tmp/versions.txt
+          python -c "from importlib.metadata import version; print(f\"black={version('black')}\")" >> /tmp/versions.txt
+          python -c "from importlib.metadata import version; print(f\"cython-lint={version('cython-lint')}\")" >> /tmp/versions.txt
+
+      - name: Run linting
+        id: lint-script
+        # We download the linting script from main, since this workflow is run
+        # from main itself.
+        run: |
+          curl https://raw.githubusercontent.com/${{ github.repository }}/main/build_tools/linting.sh --retry 5 -o ./build_tools/linting.sh
+          set +e
+          ./build_tools/linting.sh &> /tmp/linting_output.txt
+          cat /tmp/linting_output.txt
+
+      - name: Upload Artifact
+        if: always()
+        uses: actions/upload-artifact@v3
+        with:
+          name: lint-log
+          path: |
+            /tmp/linting_output.txt
+            /tmp/versions.txt
+          retention-days: 1
+
+  comment:
+    needs: lint
+    if: always()
+    runs-on: ubuntu-latest
+
+    # We need these permissions to be able to post / update comments
+    permissions:
+      pull-requests: write
+      issues: write
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: 3.11
+
+      - name: Install dependencies
+        run: python -m pip install requests
+
+      - name: Download artifact
+        id: download-artifact
+        uses: actions/download-artifact@v3
+        with:
+          name: lint-log
+
+      - name: Print log
+        run: cat linting_output.txt
+
+      - name: Process Comments
+        id: process-comments
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.pull_request.number }}
+          BRANCH_SHA: ${{ github.event.pull_request.head.sha }}
+          RUN_ID: ${{ github.run_id }}
+          LOG_FILE: linting_output.txt
+          VERSIONS_FILE: versions.txt
+        run: python ./build_tools/get_comment.py
diff --git a/.github/workflows/wheels.yml b/.github/workflows/wheels.yml
index b43f29ffa4f7f..b82a114bff1af 100644
--- a/.github/workflows/wheels.yml
+++ b/.github/workflows/wheels.yml
@@ -67,6 +67,9 @@ jobs:
           - os: windows-latest
             python: 311
             platform_id: win_amd64
+          - os: windows-latest
+            python: 312
+            platform_id: win_amd64
 
           # Linux 64 bit manylinux2014
           - os: ubuntu-latest
@@ -88,6 +91,10 @@ jobs:
             python: 311
             platform_id: manylinux_x86_64
             manylinux_image: manylinux2014
+          - os: ubuntu-latest
+            python: 312
+            platform_id: manylinux_x86_64
+            manylinux_image: manylinux2014
 
           # MacOS x86_64
           - os: macos-latest
@@ -102,6 +109,27 @@ jobs:
           - os: macos-latest
             python: 311
             platform_id: macosx_x86_64
+          - os: macos-latest
+            python: 312
+            platform_id: macosx_x86_64
+
+          # MacOS arm64
+          # The wheel for the latest Python version is built and tested on
+          # Cirrus CI but due to limited build time for free accounts on Cirrus
+          # CI, we build the macOS arm64 wheels for the other Python versions on
+          # Github Actions via cross-compilation (without running the tests).
+          - os: macos-latest
+            python: 38
+            platform_id: macosx_arm64
+          - os: macos-latest
+            python: 39
+            platform_id: macosx_arm64
+          - os: macos-latest
+            python: 310
+            platform_id: macosx_arm64
+          - os: macos-latest
+            python: 311
+            platform_id: macosx_arm64
 
     steps:
       - name: Checkout scikit-learn
@@ -116,6 +144,7 @@ jobs:
         env:
           CONFTEST_PATH: ${{ github.workspace }}/conftest.py
           CONFTEST_NAME: conftest.py
+          CIBW_PRERELEASE_PYTHONS: ${{ matrix.prerelease }}
           CIBW_ENVIRONMENT: SKLEARN_SKIP_NETWORK_TESTS=1
                             SKLEARN_BUILD_PARALLEL=3
           CIBW_BUILD: cp${{ matrix.python }}-${{ matrix.platform_id }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 26db27bc827b2..abffbbe149f2c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -9,13 +9,14 @@ repos:
     rev: 23.3.0
     hooks:
     -   id: black
--   repo: https://github.com/pycqa/flake8
-    rev: 4.0.1
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.0.272
     hooks:
-    -   id: flake8
-        types: [file, python]
+    -   id: ruff
+        args: ["--fix", "--show-source"]
 -   repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.961
+    rev: v1.3.0
     hooks:
      -  id: mypy
         files: sklearn/
diff --git a/Makefile b/Makefile
index 5ea64dc0d6cac..e2ae6aa75ca94 100644
--- a/Makefile
+++ b/Makefile
@@ -61,5 +61,4 @@ doc-noplot: inplace
 	$(MAKE) -C doc html-noplot
 
 code-analysis:
-	flake8 sklearn | grep -v __init__ | grep -v external
-	pylint -E -i y sklearn/ -d E1103,E0611,E1101
+	build_tools/linting.sh
diff --git a/README.rst b/README.rst
index 80de41a8890a1..4042420ba2977 100644
--- a/README.rst
+++ b/README.rst
@@ -17,7 +17,7 @@
 .. |Nightly wheels| image:: https://github.com/scikit-learn/scikit-learn/workflows/Wheel%20builder/badge.svg?event=schedule
 .. _`Nightly wheels`: https://github.com/scikit-learn/scikit-learn/actions?query=workflow%3A%22Wheel+builder%22+event%3Aschedule
 
-.. |PythonVersion| image:: https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10-blue
+.. |PythonVersion| image:: https://img.shields.io/pypi/pyversions/scikit-learn.svg
 .. _PythonVersion: https://pypi.org/project/scikit-learn/
 
 .. |PyPi| image:: https://img.shields.io/pypi/v/scikit-learn
diff --git a/SECURITY.md b/SECURITY.md
index 9af364e1651e3..fffe1d46b2ed0 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -4,8 +4,8 @@
 
 | Version   | Supported          |
 | --------- | ------------------ |
-| 1.2.2     | :white_check_mark: |
-| < 1.2.2   | :x:                |
+| 1.3.1     | :white_check_mark: |
+| < 1.3.1   | :x:                |
 
 ## Reporting a Vulnerability
 
diff --git a/asv_benchmarks/asv.conf.json b/asv_benchmarks/asv.conf.json
index 9f65d194b6d84..f261ce1553c8f 100644
--- a/asv_benchmarks/asv.conf.json
+++ b/asv_benchmarks/asv.conf.json
@@ -71,13 +71,17 @@
     // pip (with all the conda available packages installed first,
     // followed by the pip installed packages).
     //
+    // The versions of the dependencies should be bumped in a dedicated commit
+    // to easily identify regressions/imrovements due to code changes from
+    // those due to dependency changes.
+    //
     "matrix": {
-        "numpy": [],
-        "scipy": [],
-        "cython": [],
-        "joblib": [],
-        "threadpoolctl": [],
-        "pandas": []
+        "numpy": ["1.25.2"],
+        "scipy": ["1.11.2"],
+        "cython": ["0.29.36"],
+        "joblib": ["1.3.2"],
+        "threadpoolctl": ["3.2.0"],
+        "pandas": ["2.1.0"]
     },
 
     // Combinations of libraries/python versions can be excluded/included
diff --git a/asv_benchmarks/benchmarks/cluster.py b/asv_benchmarks/benchmarks/cluster.py
index ba460e6b503a6..457a15dd938e9 100644
--- a/asv_benchmarks/benchmarks/cluster.py
+++ b/asv_benchmarks/benchmarks/cluster.py
@@ -1,7 +1,7 @@
 from sklearn.cluster import KMeans, MiniBatchKMeans
 
 from .common import Benchmark, Estimator, Predictor, Transformer
-from .datasets import _blobs_dataset, _20newsgroups_highdim_dataset
+from .datasets import _20newsgroups_highdim_dataset, _blobs_dataset
 from .utils import neg_mean_inertia
 
 
diff --git a/asv_benchmarks/benchmarks/common.py b/asv_benchmarks/benchmarks/common.py
index c3e114a212047..aeea558844587 100644
--- a/asv_benchmarks/benchmarks/common.py
+++ b/asv_benchmarks/benchmarks/common.py
@@ -1,11 +1,11 @@
-import os
+import itertools
 import json
-import timeit
+import os
 import pickle
-import itertools
+import timeit
 from abc import ABC, abstractmethod
-from pathlib import Path
 from multiprocessing import cpu_count
+from pathlib import Path
 
 import numpy as np
 
diff --git a/asv_benchmarks/benchmarks/datasets.py b/asv_benchmarks/benchmarks/datasets.py
index dbe0eac0b822c..8f0c915c95e63 100644
--- a/asv_benchmarks/benchmarks/datasets.py
+++ b/asv_benchmarks/benchmarks/datasets.py
@@ -1,21 +1,22 @@
+from pathlib import Path
+
 import numpy as np
 import scipy.sparse as sp
 from joblib import Memory
-from pathlib import Path
 
-from sklearn.decomposition import TruncatedSVD
 from sklearn.datasets import (
-    make_blobs,
     fetch_20newsgroups,
+    fetch_olivetti_faces,
     fetch_openml,
     load_digits,
-    make_regression,
+    make_blobs,
     make_classification,
-    fetch_olivetti_faces,
+    make_regression,
 )
-from sklearn.preprocessing import MaxAbsScaler, StandardScaler
+from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MaxAbsScaler, StandardScaler
 
 # memory location for caching datasets
 M = Memory(location=str(Path(__file__).resolve().parent / "cache"))
diff --git a/asv_benchmarks/benchmarks/decomposition.py b/asv_benchmarks/benchmarks/decomposition.py
index 02a7862caeb69..0a7bb7ad07f3e 100644
--- a/asv_benchmarks/benchmarks/decomposition.py
+++ b/asv_benchmarks/benchmarks/decomposition.py
@@ -1,8 +1,8 @@
 from sklearn.decomposition import PCA, DictionaryLearning, MiniBatchDictionaryLearning
 
 from .common import Benchmark, Estimator, Transformer
-from .datasets import _olivetti_faces_dataset, _mnist_dataset
-from .utils import make_pca_scorers, make_dict_learning_scorers
+from .datasets import _mnist_dataset, _olivetti_faces_dataset
+from .utils import make_dict_learning_scorers, make_pca_scorers
 
 
 class PCABenchmark(Transformer, Estimator, Benchmark):
diff --git a/asv_benchmarks/benchmarks/ensemble.py b/asv_benchmarks/benchmarks/ensemble.py
index 8c5a28e3da90f..c336d1e5f8805 100644
--- a/asv_benchmarks/benchmarks/ensemble.py
+++ b/asv_benchmarks/benchmarks/ensemble.py
@@ -1,7 +1,7 @@
 from sklearn.ensemble import (
-    RandomForestClassifier,
     GradientBoostingClassifier,
     HistGradientBoostingClassifier,
+    RandomForestClassifier,
 )
 
 from .common import Benchmark, Estimator, Predictor
diff --git a/asv_benchmarks/benchmarks/linear_model.py b/asv_benchmarks/benchmarks/linear_model.py
index b694a109329f0..7e7b9d33540c6 100644
--- a/asv_benchmarks/benchmarks/linear_model.py
+++ b/asv_benchmarks/benchmarks/linear_model.py
@@ -1,9 +1,9 @@
 from sklearn.linear_model import (
-    LogisticRegression,
-    Ridge,
     ElasticNet,
     Lasso,
     LinearRegression,
+    LogisticRegression,
+    Ridge,
     SGDRegressor,
 )
 
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index dfefda5ccddb9..464096fb69c29 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -35,7 +35,7 @@ jobs:
     - bash: |
         source build_tools/shared.sh
         # Include pytest compatibility with mypy
-        pip install pytest flake8 $(get_dep mypy min) $(get_dep black min) cython-lint
+        pip install pytest ruff $(get_dep mypy min) $(get_dep black min) cython-lint
       displayName: Install linters
     - bash: |
         ./build_tools/linting.sh
@@ -127,9 +127,9 @@ jobs:
     vmImage: ubuntu-22.04
   variables:
     # Need to match Python version and Emscripten version for the correct
-    # Pyodide version. For Pyodide version 0.23.2, see
-    # https://github.com/pyodide/pyodide/blob/0.23.2/Makefile.envs
-    PYODIDE_VERSION: '0.23.2'
+    # Pyodide version. For example, for Pyodide version 0.23.4, see
+    # https://github.com/pyodide/pyodide/blob/0.23.4/Makefile.envs
+    PYODIDE_VERSION: '0.23.4'
     EMSCRIPTEN_VERSION: '3.1.32'
     PYTHON_VERSION: '3.11.2'
 
@@ -171,7 +171,6 @@ jobs:
         DISTRIB: 'conda'
         LOCK_FILE: './build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock'
         COVERAGE: 'true'
-        SHOW_SHORT_SUMMARY: 'true'
         SKLEARN_TESTS_GLOBAL_RANDOM_SEED: '42'  # default global random seed
 
 # Check compilation with Ubuntu 22.04 LTS (Jammy Jellyfish) and scipy from conda-forge
diff --git a/benchmarks/bench_20newsgroups.py b/benchmarks/bench_20newsgroups.py
index c542349839178..a559bc59b5f8a 100644
--- a/benchmarks/bench_20newsgroups.py
+++ b/benchmarks/bench_20newsgroups.py
@@ -1,18 +1,19 @@
-from time import time
 import argparse
-import numpy as np
+from time import time
 
-from sklearn.dummy import DummyClassifier
+import numpy as np
 
 from sklearn.datasets import fetch_20newsgroups_vectorized
-from sklearn.metrics import accuracy_score
-from sklearn.utils.validation import check_array
-
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.ensemble import AdaBoostClassifier
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import (
+    AdaBoostClassifier,
+    ExtraTreesClassifier,
+    RandomForestClassifier,
+)
 from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
 from sklearn.naive_bayes import MultinomialNB
+from sklearn.utils.validation import check_array
 
 ESTIMATORS = {
     "dummy": DummyClassifier(),
diff --git a/benchmarks/bench_covertype.py b/benchmarks/bench_covertype.py
index 8a13a2d9806c6..5b8cdd588c8ee 100644
--- a/benchmarks/bench_covertype.py
+++ b/benchmarks/bench_covertype.py
@@ -45,20 +45,24 @@
 #         Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: BSD 3 clause
 
+import argparse
 import os
 from time import time
-import argparse
+
 import numpy as np
 from joblib import Memory
 
 from sklearn.datasets import fetch_covtype, get_data_home
-from sklearn.svm import LinearSVC
-from sklearn.linear_model import SGDClassifier, LogisticRegression
+from sklearn.ensemble import (
+    ExtraTreesClassifier,
+    GradientBoostingClassifier,
+    RandomForestClassifier,
+)
+from sklearn.linear_model import LogisticRegression, SGDClassifier
+from sklearn.metrics import zero_one_loss
 from sklearn.naive_bayes import GaussianNB
+from sklearn.svm import LinearSVC
 from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
-from sklearn.ensemble import GradientBoostingClassifier
-from sklearn.metrics import zero_one_loss
 from sklearn.utils import check_array
 
 # Memoize the data extraction and memory map the resulting
diff --git a/benchmarks/bench_feature_expansions.py b/benchmarks/bench_feature_expansions.py
index fd5a4f0ebccff..b9d9efbdea4f1 100644
--- a/benchmarks/bench_feature_expansions.py
+++ b/benchmarks/bench_feature_expansions.py
@@ -1,8 +1,10 @@
+from time import time
+
 import matplotlib.pyplot as plt
 import numpy as np
 import scipy.sparse as sparse
+
 from sklearn.preprocessing import PolynomialFeatures
-from time import time
 
 degree = 2
 trials = 3
diff --git a/benchmarks/bench_glm.py b/benchmarks/bench_glm.py
index c6c2a6f5fa117..803043398d1ac 100644
--- a/benchmarks/bench_glm.py
+++ b/benchmarks/bench_glm.py
@@ -5,9 +5,10 @@
 
 """
 from datetime import datetime
+
 import numpy as np
-from sklearn import linear_model
 
+from sklearn import linear_model
 
 if __name__ == "__main__":
     import matplotlib.pyplot as plt
diff --git a/benchmarks/bench_glmnet.py b/benchmarks/bench_glmnet.py
index 8a0a0545bb627..7b111f95044e2 100644
--- a/benchmarks/bench_glmnet.py
+++ b/benchmarks/bench_glmnet.py
@@ -16,9 +16,11 @@
 
 In both cases, only 10% of the features are informative.
 """
-import numpy as np
 import gc
 from time import time
+
+import numpy as np
+
 from sklearn.datasets import make_regression
 
 alpha = 0.1
@@ -45,11 +47,11 @@ def bench(factory, X, Y, X_test, Y_test, ref_coef):
 
 
 if __name__ == "__main__":
-    from glmnet.elastic_net import Lasso as GlmnetLasso
-    from sklearn.linear_model import Lasso as ScikitLasso
-
     # Delayed import of matplotlib.pyplot
     import matplotlib.pyplot as plt
+    from glmnet.elastic_net import Lasso as GlmnetLasso
+
+    from sklearn.linear_model import Lasso as ScikitLasso
 
     scikit_results = []
     glmnet_results = []
diff --git a/benchmarks/bench_hist_gradient_boosting.py b/benchmarks/bench_hist_gradient_boosting.py
index 163e21f98ed0d..c1dfffabe71c2 100644
--- a/benchmarks/bench_hist_gradient_boosting.py
+++ b/benchmarks/bench_hist_gradient_boosting.py
@@ -1,15 +1,16 @@
-from time import time
 import argparse
+from time import time
 
 import matplotlib.pyplot as plt
 import numpy as np
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.ensemble import HistGradientBoostingClassifier
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_regression
-from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
 
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.model_selection import train_test_split
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--n-leaf-nodes", type=int, default=31)
diff --git a/benchmarks/bench_hist_gradient_boosting_adult.py b/benchmarks/bench_hist_gradient_boosting_adult.py
index 1b5905b1cf4e8..5fa5bbae0c35c 100644
--- a/benchmarks/bench_hist_gradient_boosting_adult.py
+++ b/benchmarks/bench_hist_gradient_boosting_adult.py
@@ -4,15 +4,14 @@
 import numpy as np
 import pandas as pd
 
-from sklearn.model_selection import train_test_split
-from sklearn.compose import make_column_transformer, make_column_selector
+from sklearn.compose import make_column_selector, make_column_transformer
 from sklearn.datasets import fetch_openml
-from sklearn.metrics import accuracy_score, roc_auc_score
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.metrics import accuracy_score, roc_auc_score
+from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import OrdinalEncoder
 
-
 parser = argparse.ArgumentParser()
 parser.add_argument("--n-leaf-nodes", type=int, default=31)
 parser.add_argument("--n-trees", type=int, default=100)
diff --git a/benchmarks/bench_hist_gradient_boosting_categorical_only.py b/benchmarks/bench_hist_gradient_boosting_categorical_only.py
index e8d215170f9c8..1085bbc49f4f8 100644
--- a/benchmarks/bench_hist_gradient_boosting_categorical_only.py
+++ b/benchmarks/bench_hist_gradient_boosting_categorical_only.py
@@ -1,11 +1,10 @@
 import argparse
 from time import time
 
-from sklearn.preprocessing import KBinsDiscretizer
 from sklearn.datasets import make_classification
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
-
+from sklearn.preprocessing import KBinsDiscretizer
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--n-leaf-nodes", type=int, default=31)
diff --git a/benchmarks/bench_hist_gradient_boosting_higgsboson.py b/benchmarks/bench_hist_gradient_boosting_higgsboson.py
index d6ed3b8e9700f..65be02ec0c4b9 100644
--- a/benchmarks/bench_hist_gradient_boosting_higgsboson.py
+++ b/benchmarks/bench_hist_gradient_boosting_higgsboson.py
@@ -1,17 +1,17 @@
-from urllib.request import urlretrieve
+import argparse
 import os
 from gzip import GzipFile
 from time import time
-import argparse
+from urllib.request import urlretrieve
 
 import numpy as np
 import pandas as pd
 from joblib import Memory
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score, roc_auc_score
+
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
-
+from sklearn.metrics import accuracy_score, roc_auc_score
+from sklearn.model_selection import train_test_split
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--n-leaf-nodes", type=int, default=31)
diff --git a/benchmarks/bench_hist_gradient_boosting_threading.py b/benchmarks/bench_hist_gradient_boosting_threading.py
index 70787fd2eb479..9acf65bdbaf6a 100644
--- a/benchmarks/bench_hist_gradient_boosting_threading.py
+++ b/benchmarks/bench_hist_gradient_boosting_threading.py
@@ -1,18 +1,19 @@
-from time import time
 import argparse
 import os
 from pprint import pprint
+from time import time
 
 import numpy as np
 from threadpoolctl import threadpool_limits
+
 import sklearn
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.ensemble import HistGradientBoostingClassifier
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_regression
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
 from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
-
+from sklearn.model_selection import train_test_split
 
 parser = argparse.ArgumentParser()
 parser.add_argument("--n-leaf-nodes", type=int, default=31)
@@ -290,8 +291,8 @@ def one_run(n_threads, n_samples):
 
 
 if args.plot or args.plot_filename:
-    import matplotlib.pyplot as plt
     import matplotlib
+    import matplotlib.pyplot as plt
 
     fig, axs = plt.subplots(2, figsize=(12, 12))
 
diff --git a/benchmarks/bench_isolation_forest.py b/benchmarks/bench_isolation_forest.py
index 1c85cfb79d321..021114af56ea6 100644
--- a/benchmarks/bench_isolation_forest.py
+++ b/benchmarks/bench_isolation_forest.py
@@ -17,12 +17,13 @@
 """
 
 from time import time
-import numpy as np
+
 import matplotlib.pyplot as plt
+import numpy as np
 
+from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml
 from sklearn.ensemble import IsolationForest
-from sklearn.metrics import roc_curve, auc
-from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml
+from sklearn.metrics import auc, roc_curve
 from sklearn.preprocessing import LabelBinarizer
 from sklearn.utils import shuffle as sh
 
diff --git a/benchmarks/bench_isotonic.py b/benchmarks/bench_isotonic.py
index 458a04a463303..221e6fb12da75 100644
--- a/benchmarks/bench_isotonic.py
+++ b/benchmarks/bench_isotonic.py
@@ -10,13 +10,15 @@
 This allows the scaling of the algorithm with the problem size to be
 visualized and understood.
 """
-import numpy as np
+import argparse
 import gc
 from datetime import datetime
-from sklearn.isotonic import isotonic_regression
-from scipy.special import expit
+
 import matplotlib.pyplot as plt
-import argparse
+import numpy as np
+from scipy.special import expit
+
+from sklearn.isotonic import isotonic_regression
 
 
 def generate_perturbed_logarithm_dataset(size):
diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
index 00721aa7f18a9..6551cb74ff86e 100644
--- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
+++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_components.py
@@ -39,13 +39,12 @@
 
 import time
 
-import numpy as np
 import matplotlib.pyplot as plt
-
+import numpy as np
 from numpy.testing import assert_array_almost_equal
-from sklearn.decomposition import KernelPCA
-from sklearn.datasets import make_circles
 
+from sklearn.datasets import make_circles
+from sklearn.decomposition import KernelPCA
 
 print(__doc__)
 
diff --git a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
index a40ddea4506dd..26a45ca9f09ca 100644
--- a/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
+++ b/benchmarks/bench_kernel_pca_solvers_time_vs_n_samples.py
@@ -41,13 +41,12 @@
 
 import time
 
-import numpy as np
 import matplotlib.pyplot as plt
-
+import numpy as np
 from numpy.testing import assert_array_almost_equal
-from sklearn.decomposition import KernelPCA
-from sklearn.datasets import make_circles
 
+from sklearn.datasets import make_circles
+from sklearn.decomposition import KernelPCA
 
 print(__doc__)
 
diff --git a/benchmarks/bench_lasso.py b/benchmarks/bench_lasso.py
index 9a893545fbb28..1c49c6f5cabdf 100644
--- a/benchmarks/bench_lasso.py
+++ b/benchmarks/bench_lasso.py
@@ -13,6 +13,7 @@
 """
 import gc
 from time import time
+
 import numpy as np
 
 from sklearn.datasets import make_regression
@@ -59,9 +60,10 @@ def compute_bench(alpha, n_samples, n_features, precompute):
 
 
 if __name__ == "__main__":
-    from sklearn.linear_model import Lasso, LassoLars
     import matplotlib.pyplot as plt
 
+    from sklearn.linear_model import Lasso, LassoLars
+
     alpha = 0.01  # regularization parameter
 
     n_features = 10
diff --git a/benchmarks/bench_lof.py b/benchmarks/bench_lof.py
index 31057e2e4067b..8652073a7203d 100644
--- a/benchmarks/bench_lof.py
+++ b/benchmarks/bench_lof.py
@@ -18,11 +18,13 @@
 """
 
 from time import time
-import numpy as np
+
 import matplotlib.pyplot as plt
+import numpy as np
+
+from sklearn.datasets import fetch_covtype, fetch_kddcup99, fetch_openml
+from sklearn.metrics import auc, roc_curve
 from sklearn.neighbors import LocalOutlierFactor
-from sklearn.metrics import roc_curve, auc
-from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml
 from sklearn.preprocessing import LabelBinarizer
 
 print(__doc__)
diff --git a/benchmarks/bench_mnist.py b/benchmarks/bench_mnist.py
index 4bc28ea1a165d..4ba17cb1003c3 100644
--- a/benchmarks/bench_mnist.py
+++ b/benchmarks/bench_mnist.py
@@ -30,26 +30,24 @@
 #         Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: BSD 3 clause
 
+import argparse
 import os
 from time import time
-import argparse
+
 import numpy as np
 from joblib import Memory
 
-from sklearn.datasets import fetch_openml
-from sklearn.datasets import get_data_home
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.ensemble import RandomForestClassifier
+from sklearn.datasets import fetch_openml, get_data_home
 from sklearn.dummy import DummyClassifier
-from sklearn.kernel_approximation import Nystroem
-from sklearn.kernel_approximation import RBFSampler
+from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
+from sklearn.kernel_approximation import Nystroem, RBFSampler
+from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import zero_one_loss
+from sklearn.neural_network import MLPClassifier
 from sklearn.pipeline import make_pipeline
 from sklearn.svm import LinearSVC
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.utils import check_array
-from sklearn.linear_model import LogisticRegression
-from sklearn.neural_network import MLPClassifier
 
 # Memoize the data extraction and memory map the resulting
 # train / test splits in readonly mode
diff --git a/benchmarks/bench_multilabel_metrics.py b/benchmarks/bench_multilabel_metrics.py
index 2a87b388e91a2..1b8449a24da51 100755
--- a/benchmarks/bench_multilabel_metrics.py
+++ b/benchmarks/bench_multilabel_metrics.py
@@ -3,26 +3,25 @@
 A comparison of multilabel target formats and metrics over them
 """
 
-from timeit import timeit
-from functools import partial
-import itertools
 import argparse
+import itertools
 import sys
+from functools import partial
+from timeit import timeit
 
 import matplotlib.pyplot as plt
-import scipy.sparse as sp
 import numpy as np
+import scipy.sparse as sp
 
 from sklearn.datasets import make_multilabel_classification
 from sklearn.metrics import (
-    f1_score,
     accuracy_score,
+    f1_score,
     hamming_loss,
     jaccard_similarity_score,
 )
 from sklearn.utils._testing import ignore_warnings
 
-
 METRICS = {
     "f1": partial(f1_score, average="micro"),
     "f1-by-sample": partial(f1_score, average="samples"),
diff --git a/benchmarks/bench_online_ocsvm.py b/benchmarks/bench_online_ocsvm.py
index 37af2fdd76562..9f92150e079dd 100644
--- a/benchmarks/bench_online_ocsvm.py
+++ b/benchmarks/bench_online_ocsvm.py
@@ -15,21 +15,20 @@
 """
 
 from time import time
-import numpy as np
 
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
 from scipy.interpolate import interp1d
 
-from sklearn.metrics import roc_curve, auc
-from sklearn.datasets import fetch_kddcup99, fetch_covtype
-from sklearn.preprocessing import LabelBinarizer, StandardScaler
-from sklearn.pipeline import make_pipeline
-from sklearn.utils import shuffle
+from sklearn.datasets import fetch_covtype, fetch_kddcup99
 from sklearn.kernel_approximation import Nystroem
-from sklearn.svm import OneClassSVM
 from sklearn.linear_model import SGDOneClassSVM
-
-import matplotlib.pyplot as plt
-import matplotlib
+from sklearn.metrics import auc, roc_curve
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import LabelBinarizer, StandardScaler
+from sklearn.svm import OneClassSVM
+from sklearn.utils import shuffle
 
 font = {"weight": "normal", "size": 15}
 
diff --git a/benchmarks/bench_plot_incremental_pca.py b/benchmarks/bench_plot_incremental_pca.py
index 0f42e4b630f1d..49b87c8c7060a 100644
--- a/benchmarks/bench_plot_incremental_pca.py
+++ b/benchmarks/bench_plot_incremental_pca.py
@@ -7,13 +7,15 @@
 
 """
 
-import numpy as np
 import gc
-from time import time
 from collections import defaultdict
+from time import time
+
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.datasets import fetch_lfw_people
-from sklearn.decomposition import IncrementalPCA, PCA
+from sklearn.decomposition import PCA, IncrementalPCA
 
 
 def plot_results(X, y, label):
diff --git a/benchmarks/bench_plot_lasso_path.py b/benchmarks/bench_plot_lasso_path.py
index c372ee07117fc..c996c9c09520f 100644
--- a/benchmarks/bench_plot_lasso_path.py
+++ b/benchmarks/bench_plot_lasso_path.py
@@ -2,16 +2,15 @@
 
 The input data is mostly low rank but is a fat infinite tail.
 """
-from collections import defaultdict
 import gc
 import sys
+from collections import defaultdict
 from time import time
 
 import numpy as np
 
-from sklearn.linear_model import lars_path, lars_path_gram
-from sklearn.linear_model import lasso_path
 from sklearn.datasets import make_regression
+from sklearn.linear_model import lars_path, lars_path_gram, lasso_path
 
 
 def compute_bench(samples_range, features_range):
diff --git a/benchmarks/bench_plot_neighbors.py b/benchmarks/bench_plot_neighbors.py
index c6e5541eda6f3..2d9cf2b08b71d 100644
--- a/benchmarks/bench_plot_neighbors.py
+++ b/benchmarks/bench_plot_neighbors.py
@@ -3,11 +3,11 @@
 """
 from time import time
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib import ticker
 
-from sklearn import neighbors, datasets
+from sklearn import datasets, neighbors
 
 
 def get_data(N, D, dataset="dense"):
diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py
index 78d6ad875cc34..d23191df0fbc9 100644
--- a/benchmarks/bench_plot_nmf.py
+++ b/benchmarks/bench_plot_nmf.py
@@ -6,28 +6,25 @@
 #          Anthony Di Franco (projected gradient, Python and NumPy port)
 # License: BSD 3 clause
 
-from time import time
+import numbers
 import sys
 import warnings
-import numbers
+from time import time
 
-import numpy as np
 import matplotlib.pyplot as plt
-from joblib import Memory
+import numpy as np
 import pandas
+from joblib import Memory
 
-from sklearn.utils._testing import ignore_warnings
-from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.decomposition import NMF
-from sklearn.decomposition._nmf import _initialize_nmf
-from sklearn.decomposition._nmf import _beta_divergence
-from sklearn.decomposition._nmf import _check_init
+from sklearn.decomposition._nmf import _beta_divergence, _check_init, _initialize_nmf
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils.extmath import safe_sparse_dot, squared_norm
+from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.utils import check_array
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils.extmath import safe_sparse_dot, squared_norm
 from sklearn.utils.validation import check_is_fitted, check_non_negative
 
-
 mem = Memory(cachedir=".", verbose=0)
 
 ###################
diff --git a/benchmarks/bench_plot_omp_lars.py b/benchmarks/bench_plot_omp_lars.py
index a800b3ebe2ba9..ec1bf3281f3a4 100644
--- a/benchmarks/bench_plot_omp_lars.py
+++ b/benchmarks/bench_plot_omp_lars.py
@@ -9,8 +9,8 @@
 
 import numpy as np
 
-from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp
 from sklearn.datasets import make_sparse_coded_signal
+from sklearn.linear_model import lars_path, lars_path_gram, orthogonal_mp
 
 
 def compute_bench(samples_range, features_range):
diff --git a/benchmarks/bench_plot_parallel_pairwise.py b/benchmarks/bench_plot_parallel_pairwise.py
index a41e3fab20589..ca12972f9be6c 100644
--- a/benchmarks/bench_plot_parallel_pairwise.py
+++ b/benchmarks/bench_plot_parallel_pairwise.py
@@ -4,9 +4,8 @@
 
 import matplotlib.pyplot as plt
 
+from sklearn.metrics.pairwise import pairwise_distances, pairwise_kernels
 from sklearn.utils import check_random_state
-from sklearn.metrics.pairwise import pairwise_distances
-from sklearn.metrics.pairwise import pairwise_kernels
 
 
 def plot(func):
diff --git a/benchmarks/bench_plot_polynomial_kernel_approximation.py b/benchmarks/bench_plot_polynomial_kernel_approximation.py
index b21589263a49f..ad89d974f3d93 100644
--- a/benchmarks/bench_plot_polynomial_kernel_approximation.py
+++ b/benchmarks/bench_plot_polynomial_kernel_approximation.py
@@ -42,21 +42,21 @@
 # License: BSD 3 clause
 
 # Load data manipulation functions
-from sklearn.datasets import load_digits
-from sklearn.model_selection import train_test_split
+# Will use this for timing results
+from time import time
 
 # Some common libraries
 import matplotlib.pyplot as plt
 import numpy as np
 
-# Will use this for timing results
-from time import time
-
-# Import SVM classifiers and feature map approximation algorithms
-from sklearn.svm import LinearSVC, SVC
+from sklearn.datasets import load_digits
 from sklearn.kernel_approximation import Nystroem, PolynomialCountSketch
+from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
 
+# Import SVM classifiers and feature map approximation algorithms
+from sklearn.svm import SVC, LinearSVC
+
 # Split data in train and test sets
 X, y = load_digits()["data"], load_digits()["target"]
 X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7)
diff --git a/benchmarks/bench_plot_randomized_svd.py b/benchmarks/bench_plot_randomized_svd.py
index 2020096a21b88..9ac4e714cb7dc 100644
--- a/benchmarks/bench_plot_randomized_svd.py
+++ b/benchmarks/bench_plot_randomized_svd.py
@@ -65,28 +65,29 @@
 
 # Author: Giorgio Patrini
 
-import numpy as np
-import scipy as sp
-import matplotlib.pyplot as plt
-
 import gc
+import os.path
 import pickle
-from time import time
 from collections import defaultdict
-import os.path
+from time import time
+
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy as sp
 
-from sklearn.utils._arpack import _init_arpack_v0
-from sklearn.utils import gen_batches
-from sklearn.utils.validation import check_random_state
-from sklearn.utils.extmath import randomized_svd
-from sklearn.datasets import make_low_rank_matrix, make_sparse_uncorrelated
 from sklearn.datasets import (
-    fetch_lfw_people,
-    fetch_openml,
     fetch_20newsgroups_vectorized,
+    fetch_lfw_people,
     fetch_olivetti_faces,
+    fetch_openml,
     fetch_rcv1,
+    make_low_rank_matrix,
+    make_sparse_uncorrelated,
 )
+from sklearn.utils import gen_batches
+from sklearn.utils._arpack import _init_arpack_v0
+from sklearn.utils.extmath import randomized_svd
+from sklearn.utils.validation import check_random_state
 
 try:
     import fbpca
diff --git a/benchmarks/bench_plot_svd.py b/benchmarks/bench_plot_svd.py
index fc370d1073be1..abd2c6fe9d4d4 100644
--- a/benchmarks/bench_plot_svd.py
+++ b/benchmarks/bench_plot_svd.py
@@ -3,13 +3,14 @@
 The data is mostly low rank but is a fat infinite tail.
 """
 import gc
-from time import time
-import numpy as np
 from collections import defaultdict
+from time import time
 
+import numpy as np
 from scipy.linalg import svd
-from sklearn.utils.extmath import randomized_svd
+
 from sklearn.datasets import make_low_rank_matrix
+from sklearn.utils.extmath import randomized_svd
 
 
 def compute_bench(samples_range, features_range, n_iter=3, rank=50):
diff --git a/benchmarks/bench_plot_ward.py b/benchmarks/bench_plot_ward.py
index 696e833eede20..fe5cee201dff4 100644
--- a/benchmarks/bench_plot_ward.py
+++ b/benchmarks/bench_plot_ward.py
@@ -4,9 +4,9 @@
 
 import time
 
+import matplotlib.pyplot as plt
 import numpy as np
 from scipy.cluster import hierarchy
-import matplotlib.pyplot as plt
 
 from sklearn.cluster import AgglomerativeClustering
 
diff --git a/benchmarks/bench_random_projections.py b/benchmarks/bench_random_projections.py
index 89a4550944f3f..bd8c62ecba484 100644
--- a/benchmarks/bench_random_projections.py
+++ b/benchmarks/bench_random_projections.py
@@ -6,19 +6,19 @@
 Benchmarks for random projections.
 
 """
+import collections
 import gc
-import sys
 import optparse
+import sys
 from datetime import datetime
-import collections
 
 import numpy as np
 import scipy.sparse as sp
 
 from sklearn import clone
 from sklearn.random_projection import (
-    SparseRandomProjection,
     GaussianRandomProjection,
+    SparseRandomProjection,
     johnson_lindenstrauss_min_dim,
 )
 
diff --git a/benchmarks/bench_rcv1_logreg_convergence.py b/benchmarks/bench_rcv1_logreg_convergence.py
index 2254ab81f30a4..166c6c2f5f9d1 100644
--- a/benchmarks/bench_rcv1_logreg_convergence.py
+++ b/benchmarks/bench_rcv1_logreg_convergence.py
@@ -3,14 +3,15 @@
 #
 # License: BSD 3 clause
 
-import matplotlib.pyplot as plt
-from joblib import Memory
-import numpy as np
 import gc
 import time
 
-from sklearn.linear_model import LogisticRegression, SGDClassifier
+import matplotlib.pyplot as plt
+import numpy as np
+from joblib import Memory
+
 from sklearn.datasets import fetch_rcv1
+from sklearn.linear_model import LogisticRegression, SGDClassifier
 from sklearn.linear_model._sag import get_auto_step_size
 
 try:
diff --git a/benchmarks/bench_saga.py b/benchmarks/bench_saga.py
index 340549ef240e1..dc2ed093f11d0 100644
--- a/benchmarks/bench_saga.py
+++ b/benchmarks/bench_saga.py
@@ -4,24 +4,24 @@
 in using multinomial logistic regression in term of learning time.
 """
 import json
-import time
 import os
+import time
 
-from sklearn.utils.parallel import delayed, Parallel
 import matplotlib.pyplot as plt
 import numpy as np
 
 from sklearn.datasets import (
+    fetch_20newsgroups_vectorized,
     fetch_rcv1,
-    load_iris,
     load_digits,
-    fetch_20newsgroups_vectorized,
+    load_iris,
 )
 from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import LabelBinarizer, LabelEncoder
 from sklearn.utils.extmath import safe_sparse_dot, softmax
+from sklearn.utils.parallel import Parallel, delayed
 
 
 def fit_single(
diff --git a/benchmarks/bench_sample_without_replacement.py b/benchmarks/bench_sample_without_replacement.py
index 10baad5a8495f..743292ca5fa61 100644
--- a/benchmarks/bench_sample_without_replacement.py
+++ b/benchmarks/bench_sample_without_replacement.py
@@ -3,14 +3,14 @@
 
 """
 import gc
-import sys
+import operator
 import optparse
+import random
+import sys
 from datetime import datetime
-import operator
 
 import matplotlib.pyplot as plt
 import numpy as np
-import random
 
 from sklearn.utils.random import sample_without_replacement
 
diff --git a/benchmarks/bench_sgd_regression.py b/benchmarks/bench_sgd_regression.py
index 47dd9e9fc758b..4b1b902795feb 100644
--- a/benchmarks/bench_sgd_regression.py
+++ b/benchmarks/bench_sgd_regression.py
@@ -1,16 +1,15 @@
 # Author: Peter Prettenhofer <peter.prettenhofer@gmail.com>
 # License: BSD 3 clause
 
-import numpy as np
-import matplotlib.pyplot as plt
-
 import gc
-
 from time import time
 
-from sklearn.linear_model import Ridge, SGDRegressor, ElasticNet
-from sklearn.metrics import mean_squared_error
+import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.datasets import make_regression
+from sklearn.linear_model import ElasticNet, Ridge, SGDRegressor
+from sklearn.metrics import mean_squared_error
 
 """
 Benchmark for SGD regression
diff --git a/benchmarks/bench_sparsify.py b/benchmarks/bench_sparsify.py
index f1aa482b8b732..1832ca40c6ddb 100644
--- a/benchmarks/bench_sparsify.py
+++ b/benchmarks/bench_sparsify.py
@@ -43,8 +43,9 @@
     60       300       381409   1271.4     97.1          clf.predict(X_test_sparse)
 """
 
-from scipy.sparse import csr_matrix
 import numpy as np
+from scipy.sparse import csr_matrix
+
 from sklearn.linear_model import SGDRegressor
 from sklearn.metrics import r2_score
 
diff --git a/benchmarks/bench_text_vectorizers.py b/benchmarks/bench_text_vectorizers.py
index 6d75d57658500..31d4141d1af97 100644
--- a/benchmarks/bench_text_vectorizers.py
+++ b/benchmarks/bench_text_vectorizers.py
@@ -8,8 +8,8 @@
  * psutil (optional, but recommended)
 
 """
-import timeit
 import itertools
+import timeit
 
 import numpy as np
 import pandas as pd
@@ -18,8 +18,8 @@
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.feature_extraction.text import (
     CountVectorizer,
-    TfidfVectorizer,
     HashingVectorizer,
+    TfidfVectorizer,
 )
 
 n_repeat = 3
diff --git a/benchmarks/bench_tree.py b/benchmarks/bench_tree.py
index c23ef627e237e..29cd7584432b7 100644
--- a/benchmarks/bench_tree.py
+++ b/benchmarks/bench_tree.py
@@ -13,11 +13,12 @@
 training set, classify a sample and plot the time taken as a function
 of the number of dimensions.
 """
-import numpy as np
-import matplotlib.pyplot as plt
 import gc
 from datetime import datetime
 
+import matplotlib.pyplot as plt
+import numpy as np
+
 # to store the results
 scikit_classifier_results = []
 scikit_regressor_results = []
diff --git a/benchmarks/bench_tsne_mnist.py b/benchmarks/bench_tsne_mnist.py
index e399e891cb94e..39462b33d9655 100644
--- a/benchmarks/bench_tsne_mnist.py
+++ b/benchmarks/bench_tsne_mnist.py
@@ -7,18 +7,19 @@
 
 # License: BSD 3 clause
 
+import argparse
+import json
 import os
 import os.path as op
 from time import time
+
 import numpy as np
-import json
-import argparse
 from joblib import Memory
 
 from sklearn.datasets import fetch_openml
+from sklearn.decomposition import PCA
 from sklearn.manifold import TSNE
 from sklearn.neighbors import NearestNeighbors
-from sklearn.decomposition import PCA
 from sklearn.utils import check_array
 from sklearn.utils import shuffle as _shuffle
 from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
diff --git a/benchmarks/plot_tsne_mnist.py b/benchmarks/plot_tsne_mnist.py
index d32e3dd769d6a..fff71eed0a26c 100644
--- a/benchmarks/plot_tsne_mnist.py
+++ b/benchmarks/plot_tsne_mnist.py
@@ -1,9 +1,8 @@
-import matplotlib.pyplot as plt
-import numpy as np
-import os.path as op
-
 import argparse
+import os.path as op
 
+import matplotlib.pyplot as plt
+import numpy as np
 
 LOG_DIR = "mnist_tsne_output"
 
diff --git a/build_tools/azure/debian_atlas_32bit_lock.txt b/build_tools/azure/debian_atlas_32bit_lock.txt
index 1a8c4eca7c291..5168bd7b96dc6 100644
--- a/build_tools/azure/debian_atlas_32bit_lock.txt
+++ b/build_tools/azure/debian_atlas_32bit_lock.txt
@@ -6,9 +6,9 @@
 #
 attrs==23.1.0
     # via pytest
-coverage==7.2.7
+coverage==7.3.0
     # via pytest-cov
-cython==0.29.35
+cython==0.29.36
     # via -r build_tools/azure/debian_atlas_32bit_requirements.txt
 iniconfig==2.0.0
     # via pytest
@@ -16,7 +16,7 @@ joblib==1.1.1
     # via -r build_tools/azure/debian_atlas_32bit_requirements.txt
 packaging==23.1
     # via pytest
-pluggy==1.0.0
+pluggy==1.3.0
     # via pytest
 py==1.11.0
     # via pytest
diff --git a/build_tools/azure/debian_atlas_32bit_requirements.txt b/build_tools/azure/debian_atlas_32bit_requirements.txt
index 83baf09b14093..a77387862c025 100644
--- a/build_tools/azure/debian_atlas_32bit_requirements.txt
+++ b/build_tools/azure/debian_atlas_32bit_requirements.txt
@@ -1,7 +1,7 @@
 # DO NOT EDIT: this file is generated from the specification found in the
 # following script to centralize the configuration for CI builds:
 # build_tools/update_environments_and_lock_files.py
-cython
+cython<3.0.0
 joblib==1.1.1  # min
 threadpoolctl==2.2.0
 pytest==7.1.2  # min
diff --git a/build_tools/azure/get_commit_message.py b/build_tools/azure/get_commit_message.py
index 239da5b8c4498..0b1246b8d2724 100644
--- a/build_tools/azure/get_commit_message.py
+++ b/build_tools/azure/get_commit_message.py
@@ -1,6 +1,6 @@
+import argparse
 import os
 import subprocess
-import argparse
 
 
 def get_commit_message():
diff --git a/build_tools/azure/install.sh b/build_tools/azure/install.sh
index 5238cd1121d2e..675fedd5d9771 100755
--- a/build_tools/azure/install.sh
+++ b/build_tools/azure/install.sh
@@ -49,7 +49,10 @@ pre_python_environment_install() {
 
 python_environment_install_and_activate() {
     if [[ "$DISTRIB" == "conda"* ]]; then
-        conda update -n base conda -y
+        # Install/update conda with the libmamba solver because the legacy
+        # solver can be slow at installing a specific version of conda-lock.
+        conda install -n base conda conda-libmamba-solver -y
+        conda config --set solver libmamba
         conda install -c conda-forge "$(get_dep conda-lock min)" -y
         conda-lock install --name $VIRTUALENV $LOCK_FILE
         source activate $VIRTUALENV
@@ -67,7 +70,7 @@ python_environment_install_and_activate() {
 
     if [[ "$DISTRIB" == "conda-pip-scipy-dev" ]]; then
         echo "Installing development dependency wheels"
-        dev_anaconda_url=https://pypi.anaconda.org/scipy-wheels-nightly/simple
+        dev_anaconda_url=https://pypi.anaconda.org/scientific-python-nightly-wheels/simple
         pip install --pre --upgrade --timeout=60 --extra-index $dev_anaconda_url numpy pandas scipy
         echo "Installing Cython from latest sources"
         pip install https://github.com/cython/cython/archive/master.zip
diff --git a/build_tools/azure/install_win.sh b/build_tools/azure/install_win.sh
index ab559a1878971..b8f2cac096471 100755
--- a/build_tools/azure/install_win.sh
+++ b/build_tools/azure/install_win.sh
@@ -7,6 +7,10 @@ set -x
 source build_tools/shared.sh
 
 if [[ "$DISTRIB" == "conda" ]]; then
+    # Install/update conda with the libmamba solver because the legacy solver
+    # can be slow at installing a specific version of conda-lock.
+    conda install -n base conda conda-libmamba-solver -y
+    conda config --set solver libmamba
     conda install -c conda-forge "$(get_dep conda-lock min)" -y
     conda-lock install --name $VIRTUALENV $LOCK_FILE
     source activate $VIRTUALENV
diff --git a/build_tools/azure/posix-docker.yml b/build_tools/azure/posix-docker.yml
index af776c4c62f14..b00ca66c378ca 100644
--- a/build_tools/azure/posix-docker.yml
+++ b/build_tools/azure/posix-docker.yml
@@ -22,7 +22,6 @@ jobs:
     # Set in azure-pipelines.yml
     DISTRIB: ''
     DOCKER_CONTAINER: ''
-    SHOW_SHORT_SUMMARY: 'false'
     CREATE_ISSUE_ON_TRACKER: 'true'
     CCACHE_DIR: $(Pipeline.Workspace)/ccache
     CCACHE_COMPRESS: '1'
diff --git a/build_tools/azure/posix.yml b/build_tools/azure/posix.yml
index 2ee03daafd288..35e5165d22c83 100644
--- a/build_tools/azure/posix.yml
+++ b/build_tools/azure/posix.yml
@@ -22,7 +22,6 @@ jobs:
     PYTEST_XDIST_VERSION: 'latest'
     COVERAGE: 'true'
     CREATE_ISSUE_ON_TRACKER: 'true'
-    SHOW_SHORT_SUMMARY: 'false'
   strategy:
     matrix:
       ${{ insert }}: ${{ parameters.matrix }}
diff --git a/build_tools/azure/py38_conda_defaults_openblas_environment.yml b/build_tools/azure/py38_conda_defaults_openblas_environment.yml
index 7abb54f99d300..2493606135454 100644
--- a/build_tools/azure/py38_conda_defaults_openblas_environment.yml
+++ b/build_tools/azure/py38_conda_defaults_openblas_environment.yml
@@ -8,7 +8,7 @@ dependencies:
   - numpy=1.17.3  # min
   - blas[build=openblas]
   - scipy=1.5.0  # min
-  - cython
+  - cython<3.0.0
   - joblib
   - threadpoolctl=2.2.0
   - matplotlib=3.1.3  # min
diff --git a/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock b/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock
index 3a15776662079..bc261623f5757 100644
--- a/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock
+++ b/build_tools/azure/py38_conda_defaults_openblas_linux-64_conda.lock
@@ -1,10 +1,10 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: 79255228ac886c1c3fdbcda6a5d6e899b5ab035d633fa540a755b9ba633c2a2c
+# input_hash: cc5492b4677e6d5132ab4ab70eda13c942bdf5f6dd53af977e801c42d5f48132
 @EXPLICIT
 https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9
 https://repo.anaconda.com/pkgs/main/linux-64/blas-1.0-openblas.conda#9ddfcaef10d79366c90128f5dc444be8
-https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.01.10-h06a4308_0.conda#7704989a2ccf6c1f5a50c985509841c4
+https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.05.30-h06a4308_0.conda#979be8dd2368decd342b13e01540d297
 https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b
 https://repo.anaconda.com/pkgs/main/linux-64/libgfortran4-7.5.0-ha8ba4b0_17.conda#e3883581cbf0a98672250c3e80d292bf
 https://repo.anaconda.com/pkgs/main/linux-64/libgfortran-ng-7.5.0-ha8ba4b0_17.conda#ecb35c8952579d5c8dc56c6e076ba948
@@ -12,7 +12,7 @@ https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b37
 https://repo.anaconda.com/pkgs/main/linux-64/libstdcxx-ng-11.2.0-h1234567_1.conda#57623d10a70e09e1d048c2b2b6f4e2dd
 https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d281e9c2192cb3fa425655a8defb85
 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464
-https://repo.anaconda.com/pkgs/main/linux-64/expat-2.4.9-h6a678d5_0.conda#3a6139fbcd96384855f0e6037502bf28
+https://repo.anaconda.com/pkgs/main/linux-64/expat-2.5.0-h6a678d5_0.conda#9a21d99d49a0a556cf9590430dec8ec0
 https://repo.anaconda.com/pkgs/main/linux-64/giflib-5.2.1-h5eee18b_3.conda#aa7d64adb3cd8a75d398167f8c29afc3
 https://repo.anaconda.com/pkgs/main/linux-64/icu-58.2-he6710b0_3.conda#48cc14d5ad1a9bcd8dac17211a8deb8b
 https://repo.anaconda.com/pkgs/main/linux-64/jpeg-9e-h5eee18b_1.conda#ac373800fda872108412d1ccfe3fa572
@@ -26,74 +26,76 @@ https://repo.anaconda.com/pkgs/main/linux-64/libxcb-1.15-h7f8727e_0.conda#ada518
 https://repo.anaconda.com/pkgs/main/linux-64/lz4-c-1.9.4-h6a678d5_0.conda#53915e9402180a7f22ea619c41089520
 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c
 https://repo.anaconda.com/pkgs/main/linux-64/nspr-4.35-h6a678d5_0.conda#208fff5d60133bcff6998a70c9f5203b
-https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1t-h7f8727e_0.conda#0410db682c02665511bd4203ade48a32
+https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.10-h7f8727e_2.conda#066a828cc9dcd120af8c503381d6a1b8
 https://repo.anaconda.com/pkgs/main/linux-64/pcre-8.45-h295c915_0.conda#b32ccc24d1d9808618c1e898da60f68d
 https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda#bcd31de48a0dcb44bc5b99675800c5cc
 https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93
 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e
 https://repo.anaconda.com/pkgs/main/linux-64/glib-2.69.1-he621ea3_2.conda#51cf1899782b3f3744aedd143fbc07f3
+https://repo.anaconda.com/pkgs/main/linux-64/libcups-2.4.2-h2d74bed_1.conda#3f265c2172a9e8c90a74037b6fa13685
 https://repo.anaconda.com/pkgs/main/linux-64/libedit-3.1.20221030-h5eee18b_0.conda#7c724a17739aceaf9d1633ff06962137
-https://repo.anaconda.com/pkgs/main/linux-64/libevent-2.1.12-h8f2d780_0.conda#8de03cd4b6ee0ddeb0571a5199db5637
+https://repo.anaconda.com/pkgs/main/linux-64/libevent-2.1.12-hdbd6064_1.conda#99312bf9d90f1ea14534b40afb61ce63
 https://repo.anaconda.com/pkgs/main/linux-64/libllvm14-14.0.6-hdb19cb5_3.conda#aefea2b45cf32f12b4f1ffaa70aa3201
 https://repo.anaconda.com/pkgs/main/linux-64/libpng-1.6.39-h5eee18b_0.conda#f6aee38184512eb05b06c2e94d39ab22
-https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.10.3-hcbfbd50_0.conda#95357588631b66da8f97ddbfbdf2e4e1
+https://repo.anaconda.com/pkgs/main/linux-64/libxml2-2.10.4-hcbfbd50_0.conda#c42cffdb0bc28d37a4eb33aed114f554
 https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb
 https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9
 https://repo.anaconda.com/pkgs/main/linux-64/zstd-1.5.5-hc292b87_0.conda#0f59d57dc21f585f4c282d60dfb46505
 https://repo.anaconda.com/pkgs/main/linux-64/dbus-1.13.18-hb2f20db_0.conda#6a6a6f1391f807847404344489ef6cf4
 https://repo.anaconda.com/pkgs/main/linux-64/freetype-2.12.1-h4a9f257_0.conda#bdc7b5952e9c5dca01bc2f4ccef2f974
 https://repo.anaconda.com/pkgs/main/linux-64/gstreamer-1.14.1-h5eee18b_1.conda#f2f26e6f869b5d87f41bd059fae47c3e
-https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.19.4-h568e23c_0.conda#649816c5e24c76bd06e74a0eb671a82e
+https://repo.anaconda.com/pkgs/main/linux-64/krb5-1.20.1-h143b758_1.conda#cf1accc86321fa25d6b978cc748039ae
 https://repo.anaconda.com/pkgs/main/linux-64/libclang13-14.0.6-default_he11475f_1.conda#44890feda1cf51639d9c94afbacce011
-https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.5.0-h6a678d5_2.conda#b3391ee6956636eb8ef159c1c454e3da
+https://repo.anaconda.com/pkgs/main/linux-64/libtiff-4.5.1-h6a678d5_0.conda#235a671f74f0c4ecad9f9b3b107e3566
 https://repo.anaconda.com/pkgs/main/linux-64/libxkbcommon-1.0.1-h5eee18b_1.conda#888b2e8f1bbf21017c503826e2d24b50
 https://repo.anaconda.com/pkgs/main/linux-64/libxslt-1.1.37-h2085143_0.conda#680f9676bf55bdafd276eaa12fbb0f28
 https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5
+https://repo.anaconda.com/pkgs/main/linux-64/cyrus-sasl-2.1.28-h52b45da_1.conda#d634af1577e4008f9228ae96ce671c44
 https://repo.anaconda.com/pkgs/main/linux-64/fontconfig-2.14.1-h4c34cd2_2.conda#f0b472f5b544f8d57beb09ed4a2932e1
 https://repo.anaconda.com/pkgs/main/linux-64/gst-plugins-base-1.14.1-h6a678d5_1.conda#afd9cbe949d670d24cc0a007aaec1fe1
 https://repo.anaconda.com/pkgs/main/linux-64/lcms2-2.12-h3be6417_0.conda#719db47afba9f6586eecb5eacac70bff
 https://repo.anaconda.com/pkgs/main/linux-64/libclang-14.0.6-default_hc6dbbc7_1.conda#8f12583c4027b2861cff470f6b8837c4
-https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.9-h16c4e8d_3.conda#0f127be216a734916faf456bb21404e9
+https://repo.anaconda.com/pkgs/main/linux-64/libpq-12.15-hdbd6064_1.conda#218227d255f6056b6f49f52dd0d1731f
 https://repo.anaconda.com/pkgs/main/linux-64/libwebp-1.2.4-h11a3e52_1.conda#9f9153b30e58e9ce896f74634622cbf1
 https://repo.anaconda.com/pkgs/main/linux-64/nss-3.89.1-h6a678d5_0.conda#4d9d28fc3a0ca4916f281d2f5429ac50
-https://repo.anaconda.com/pkgs/main/linux-64/python-3.8.16-h7a1cb2a_3.conda#c11c0992727585f5f991760f5b18c968
-https://repo.anaconda.com/pkgs/main/linux-64/attrs-22.1.0-py38h06a4308_0.conda#51beb64c6f06b5a69529df7ecaccc3f9
+https://repo.anaconda.com/pkgs/main/linux-64/python-3.8.17-h955ad1f_0.conda#f901f4fd76d24a2d598788a24e4d7246
 https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab
-https://repo.anaconda.com/pkgs/main/linux-64/cython-0.29.33-py38h6a678d5_0.conda#eb105388ba8bcf5ce82cf4cd5deeb5f9
+https://repo.anaconda.com/pkgs/main/linux-64/cython-0.29.36-py38h5eee18b_0.conda#0465e461450c86b395da9ccc3853d7dc
 https://repo.anaconda.com/pkgs/main/linux-64/exceptiongroup-1.0.4-py38h06a4308_0.conda#db954e73dca6076c64a1004d71b45784
 https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513
 https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507
 https://repo.anaconda.com/pkgs/main/linux-64/joblib-1.2.0-py38h06a4308_0.conda#ee7f1f50ae15650057e5d5301900ae34
 https://repo.anaconda.com/pkgs/main/linux-64/kiwisolver-1.4.4-py38h6a678d5_0.conda#7424aa335d22974192800ec19a68486e
+https://repo.anaconda.com/pkgs/main/linux-64/mysql-5.7.24-h721c034_2.conda#dfc19ca2466d275c4c1f73b62c57f37b
 https://repo.anaconda.com/pkgs/main/linux-64/numpy-base-1.17.3-py38h2f8d375_0.conda#40edbb76ecacefb1e6ab639b514822b1
-https://repo.anaconda.com/pkgs/main/linux-64/packaging-23.0-py38h06a4308_0.conda#87dd3a3af0b6c6f5bbb99b7f205c2612
+https://repo.anaconda.com/pkgs/main/linux-64/packaging-23.1-py38h06a4308_0.conda#9ec9b6ee22dad7f49806c51218befd5b
 https://repo.anaconda.com/pkgs/main/linux-64/pillow-9.4.0-py38h6a678d5_0.conda#8afd1f4f8b23a1c44fca4975253b17f7
 https://repo.anaconda.com/pkgs/main/linux-64/pluggy-1.0.0-py38h06a4308_1.conda#87bb1d3f6cf3e409a1dac38cee99918e
 https://repo.anaconda.com/pkgs/main/linux-64/ply-3.11-py38_0.conda#d6a69c576c6e4d19e3074eaae3d149f2
 https://repo.anaconda.com/pkgs/main/noarch/py-1.11.0-pyhd3eb1b0_0.conda#7205a898ed2abbf6e9b903dff6abe08e
 https://repo.anaconda.com/pkgs/main/linux-64/pyparsing-3.0.9-py38h06a4308_0.conda#becbbf51d2b05de228eed968e20f963d
 https://repo.anaconda.com/pkgs/main/linux-64/pytz-2022.7-py38h06a4308_0.conda#19c9f6a24d5c6f779c645d00f646666b
-https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h8373d8f_8.conda#fd275fd09d648f31bfdb27aebb239eeb
-https://repo.anaconda.com/pkgs/main/linux-64/setuptools-67.8.0-py38h06a4308_0.conda#629ffd3b3738163d536a6c06e0b14164
+https://repo.anaconda.com/pkgs/main/linux-64/setuptools-68.0.0-py38h06a4308_0.conda#24f9c895455f3992d6b04957fd0e7546
 https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0
 https://repo.anaconda.com/pkgs/main/noarch/threadpoolctl-2.2.0-pyh0d69192_0.conda#bbfdbae4934150b902f97daaf287efe2
 https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a
 https://repo.anaconda.com/pkgs/main/linux-64/tomli-2.0.1-py38h06a4308_0.conda#791cce9de9913e9587b0a85cd8419123
-https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.2-py38h5eee18b_0.conda#db2f7ebc500d97a4af6889dfd0d03dbc
+https://repo.anaconda.com/pkgs/main/linux-64/tornado-6.3.2-py38h5eee18b_0.conda#276225b0c9533f993c5d71ac01e72ddf
 https://repo.anaconda.com/pkgs/main/linux-64/coverage-7.2.2-py38h5eee18b_0.conda#a05c1732d4e67102d2aa8d7e56de778b
 https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.17.3-py38h7e8d029_0.conda#5f2b196b515f8fe6b37e3d224650577d
-https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.3.1-py38h06a4308_0.conda#456f5c7532523cc7bd098e0a87a199dc
+https://repo.anaconda.com/pkgs/main/linux-64/pytest-7.4.0-py38h06a4308_0.conda#ba6c58ef1c6ba5247ccc17d41fdd71e5
 https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9
-https://repo.anaconda.com/pkgs/main/linux-64/qt-webengine-5.15.9-hbbf29b9_6.conda#9f2b3a9673e955f7ecc9e814d9afc9f5
+https://repo.anaconda.com/pkgs/main/linux-64/qt-main-5.15.2-h7358343_9.conda#d3eac069d7e4e93b866a07c2274c9ee7
 https://repo.anaconda.com/pkgs/main/linux-64/sip-6.6.2-py38h6a678d5_0.conda#cb3f0d10f7f79870945f4dbbe0000f92
 https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-base-3.1.3-py38hef1b27d_0.conda#a7ad7d097c25b7beeb76f370d51687a1
 https://repo.anaconda.com/pkgs/main/linux-64/pandas-1.2.4-py38ha9443f7_0.conda#5bd3fd807a294f387feabc65821b75d0
 https://repo.anaconda.com/pkgs/main/linux-64/pyqt5-sip-12.11.0-py38h6a678d5_1.conda#7bc403c7d55f1465e922964d293d2186
 https://repo.anaconda.com/pkgs/main/linux-64/pytest-cov-4.0.0-py38h06a4308_0.conda#54035e39255f285f98ca1141b7f098e7
 https://repo.anaconda.com/pkgs/main/noarch/pytest-forked-1.3.0-pyhd3eb1b0_0.tar.bz2#07970bffdc78f417d7f8f1c7e620f5c4
-https://repo.anaconda.com/pkgs/main/linux-64/qtwebkit-5.212-h3fafdc1_5.conda#e811bbc0456e3d3a02cab199492153ee
+https://repo.anaconda.com/pkgs/main/linux-64/qt-webengine-5.15.9-h9ab4d14_7.conda#907aa480f11eabd16bd6c72c81720ef2
 https://repo.anaconda.com/pkgs/main/linux-64/scipy-1.5.0-py38habc2bb6_0.conda#a27a97fc2377ab74cbd33ce22d3c3353
 https://repo.anaconda.com/pkgs/main/linux-64/pyamg-4.2.3-py38h79cecc1_0.conda#6e7f4f94000b244396de8bf4e6ae8dc4
-https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.7-py38h6a678d5_1.conda#62232dc285be8e7e85ae9596d89b3b95
 https://repo.anaconda.com/pkgs/main/noarch/pytest-xdist-2.5.0-pyhd3eb1b0_0.conda#d15cdc4207bcf8ca920822597f1d138d
+https://repo.anaconda.com/pkgs/main/linux-64/qtwebkit-5.212-h3fafdc1_5.conda#e811bbc0456e3d3a02cab199492153ee
+https://repo.anaconda.com/pkgs/main/linux-64/pyqt-5.15.7-py38h6a678d5_1.conda#62232dc285be8e7e85ae9596d89b3b95
 https://repo.anaconda.com/pkgs/main/linux-64/matplotlib-3.1.3-py38_0.conda#70d5f6df438d469dc78f082389ada23d
diff --git a/build_tools/azure/py38_conda_forge_mkl_environment.yml b/build_tools/azure/py38_conda_forge_mkl_environment.yml
index 2a2955d523a97..e2034fd25ddc8 100644
--- a/build_tools/azure/py38_conda_forge_mkl_environment.yml
+++ b/build_tools/azure/py38_conda_forge_mkl_environment.yml
@@ -8,7 +8,7 @@ dependencies:
   - numpy
   - blas[build=mkl]
   - scipy
-  - cython
+  - cython<3.0.0
   - joblib
   - threadpoolctl
   - matplotlib
diff --git a/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock b/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock
index 939830bc2a0a0..322a6d02f47c9 100644
--- a/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock
+++ b/build_tools/azure/py38_conda_forge_mkl_win-64_conda.lock
@@ -1,126 +1,126 @@
 # Generated by conda-lock.
 # platform: win-64
-# input_hash: e3af9571d95aff7d02e118db6e2ccbce90cd3cf3c663b4ed8a5e8c3fef5b1318
+# input_hash: 6608a61d0b91d8f5b96cf9aaa6a208586babc4fe18583a535509c82488bd1ebb
 @EXPLICIT
-https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2023.5.7-h56e8100_0.conda#604212634bd8c4d6f20d44b946e8eedb
-https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2023.1.0-h57928b3_46319.conda#dbc4636f419722fbf3ab6501377228ba
+https://conda.anaconda.org/conda-forge/win-64/ca-certificates-2023.7.22-h56e8100_0.conda#b1c2327b36f1a25d96f2039b0d3e3739
+https://conda.anaconda.org/conda-forge/win-64/intel-openmp-2023.2.0-h57928b3_49496.conda#f2e71622520883ffdbc379b13049534c
 https://conda.anaconda.org/conda-forge/win-64/mkl-include-2022.1.0-h6a75c08_874.tar.bz2#414f6ab96ad71e7a95bd00d990fa3473
 https://conda.anaconda.org/conda-forge/win-64/msys2-conda-epoch-20160418-1.tar.bz2#b0309b72560df66f71a9d5e34a5efdfa
 https://conda.anaconda.org/conda-forge/win-64/python_abi-3.8-3_cp38.conda#c6df946723dadd4a5830a8ff8c6b9a20
 https://conda.anaconda.org/conda-forge/win-64/ucrt-10.0.22621.0-h57928b3_0.tar.bz2#72608f6cd3e5898229c3ea16deb1ac43
 https://conda.anaconda.org/conda-forge/win-64/m2w64-gmp-6.1.0-2.tar.bz2#53a1c73e1e3d185516d7e3af177596d9
 https://conda.anaconda.org/conda-forge/win-64/m2w64-libwinpthread-git-5.0.0.4634.697f757-2.tar.bz2#774130a326dee16f1ceb05cc687ee4f0
-https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.34.31931-h5081d32_16.conda#22125178654c6a8a393f9743d585704b
+https://conda.anaconda.org/conda-forge/win-64/vc14_runtime-14.36.32532-hfdfe4a8_17.conda#91c1ecaf3996889532fc0456178b1058
 https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-core-5.3.0-7.tar.bz2#4289d80fb4d272f1f3b56cfe87ac90bd
-https://conda.anaconda.org/conda-forge/win-64/vc-14.3-hb25d44b_16.conda#ea326b37e3bd6d2616988e09f3a9396c
-https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.34.31931-hed1258a_16.conda#0374eae69b6dbfb27c3dc27167109eb4
+https://conda.anaconda.org/conda-forge/win-64/vc-14.3-h64f974e_17.conda#67ff6791f235bb606659bf2a5c169191
+https://conda.anaconda.org/conda-forge/win-64/vs2015_runtime-14.36.32532-h05e6639_17.conda#4618046c39f7c81861e53ded842e738a
 https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h8ffe710_4.tar.bz2#7c03c66026944073040cb19a4f3ec3c9
 https://conda.anaconda.org/conda-forge/win-64/icu-72.1-h63175ca_0.conda#a108731562663d787066bd17c9595114
 https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h63175ca_0.tar.bz2#1900cb3cab5055833cfddb0ba233b074
-https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.0.9-hcfcfb64_8.tar.bz2#e8078e37208cd7d3e1eb5053f370ded8
+https://conda.anaconda.org/conda-forge/win-64/libbrotlicommon-1.1.0-hcfcfb64_0.conda#a10abcccb9339c0bfbf7655e2643f6ac
 https://conda.anaconda.org/conda-forge/win-64/libdeflate-1.18-hcfcfb64_0.conda#493acc14c556ef6f1d13ba00b099c679
 https://conda.anaconda.org/conda-forge/win-64/libffi-3.4.2-h8ffe710_5.tar.bz2#2c96d1b6915b408893f9472569dee135
 https://conda.anaconda.org/conda-forge/win-64/libiconv-1.17-h8ffe710_0.tar.bz2#050119977a86e4856f0416e2edcf81bb
 https://conda.anaconda.org/conda-forge/win-64/libjpeg-turbo-2.1.5.1-hcfcfb64_0.conda#f2fad2ae9f1365e343e4329fdb1e9d63
 https://conda.anaconda.org/conda-forge/win-64/libogg-1.3.4-h8ffe710_1.tar.bz2#04286d905a0dcb7f7d4a12bdfe02516d
-https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.42.0-hcfcfb64_0.conda#9a71d93deb99cc09d8939d5235b5909a
-https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.3.0-hcfcfb64_0.conda#381a3645c51cbf478872899b16490318
-https://conda.anaconda.org/conda-forge/win-64/libzlib-1.2.13-hcfcfb64_4.tar.bz2#0cc5c5cc64ee1637f37f8540a175854c
+https://conda.anaconda.org/conda-forge/win-64/libsqlite-3.43.0-hcfcfb64_0.conda#16c6f482e70cb3da41d0bee5d49c6bf3
+https://conda.anaconda.org/conda-forge/win-64/libwebp-base-1.3.1-hcfcfb64_0.conda#f89e765213cac556a8ed72ba8c1b5071
+https://conda.anaconda.org/conda-forge/win-64/libzlib-1.2.13-hcfcfb64_5.conda#5fdb9c6a113b6b6cb5e517fd972d5f41
 https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libgfortran-5.3.0-6.tar.bz2#066552ac6b907ec6d72c0ddab29050dc
-https://conda.anaconda.org/conda-forge/win-64/openssl-3.1.1-hcfcfb64_1.conda#1d913a5de46c6b2f7e4cfbd26b106b8b
+https://conda.anaconda.org/conda-forge/win-64/openssl-3.1.2-hcfcfb64_0.conda#79b3f40f27cd80a265c276cea6714507
 https://conda.anaconda.org/conda-forge/win-64/pthreads-win32-2.9.1-hfa6e2cd_3.tar.bz2#e2da8758d7d51ff6aa78a14dfb9dbed4
 https://conda.anaconda.org/conda-forge/win-64/tk-8.6.12-h8ffe710_0.tar.bz2#c69a5047cc9291ae40afd4a1ad6f0c0f
 https://conda.anaconda.org/conda-forge/win-64/xz-5.2.6-h8d14728_0.tar.bz2#515d77642eaa3639413c6b1bc3f94219
 https://conda.anaconda.org/conda-forge/win-64/gettext-0.21.1-h5728263_0.tar.bz2#299d4fd6798a45337042ff5a48219e5f
-https://conda.anaconda.org/conda-forge/win-64/krb5-1.20.1-heb0366b_0.conda#a07b05ee8f451ab15698397185efe989
-https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.0.9-hcfcfb64_8.tar.bz2#99839d9d81f33afa173c0fa82a702038
-https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.0.9-hcfcfb64_8.tar.bz2#88e62627120c20289bf8982b15e0a6a1
-https://conda.anaconda.org/conda-forge/win-64/libclang13-15.0.7-default_h77d9078_2.conda#c2e1def32a19610ac26db453501760b6
+https://conda.anaconda.org/conda-forge/win-64/krb5-1.21.2-heb0366b_0.conda#6e8b0f22b4eef3b3cb3849bb4c3d47f9
+https://conda.anaconda.org/conda-forge/win-64/libbrotlidec-1.1.0-hcfcfb64_0.conda#b694da94a046204c19b25673de87f796
+https://conda.anaconda.org/conda-forge/win-64/libbrotlienc-1.1.0-hcfcfb64_0.conda#90b031a15d678424b100444e9959480b
+https://conda.anaconda.org/conda-forge/win-64/libclang13-15.0.7-default_h77d9078_3.conda#ba26634d038b91466bb4242c8b5e0cfa
 https://conda.anaconda.org/conda-forge/win-64/libpng-1.6.39-h19919ed_0.conda#ab6febdb2dbd9c00803609079db4de71
 https://conda.anaconda.org/conda-forge/win-64/libvorbis-1.3.7-h0e60522_0.tar.bz2#e1a22282de0169c93e4ffe6ce6acc212
-https://conda.anaconda.org/conda-forge/win-64/libxml2-2.11.4-hc3477c8_0.conda#586627982a63815637f871a6360fe3f9
+https://conda.anaconda.org/conda-forge/win-64/libxml2-2.11.5-hc3477c8_1.conda#27974f880a010b1441093d9f737a949f
 https://conda.anaconda.org/conda-forge/win-64/m2w64-gcc-libs-5.3.0-7.tar.bz2#fe759119b8b3bfa720b8762c6fdc35de
 https://conda.anaconda.org/conda-forge/win-64/pcre2-10.40-h17e33f8_0.tar.bz2#2519de0d9620dc2bc7e19caf6867136d
-https://conda.anaconda.org/conda-forge/win-64/python-3.8.16-h4de0772_1_cpython.conda#461d9fc92cfde68f2ca7ef0988f6326a
-https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.2-h12be248_6.conda#62826565682d013b3e2346aaf7bded0e
-https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.0.9-hcfcfb64_8.tar.bz2#e18b70ed349d96086fd60a9c642b1b58
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
+https://conda.anaconda.org/conda-forge/win-64/python-3.8.17-h4de0772_0_cpython.conda#be2296eaf70eeb1cb83c4e95136e694a
+https://conda.anaconda.org/conda-forge/win-64/zstd-1.5.5-h12be248_0.conda#792bb5da68bf0a6cac6a6072ecb8dbeb
+https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-hcfcfb64_0.conda#8ea6b316fc21d9fb0b1b9a9671073c3c
+https://conda.anaconda.org/conda-forge/win-64/brotli-python-1.1.0-py38hd3f51b4_0.conda#7e1596f0034ba2f0e31bc4c74c3bc701
+https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
-https://conda.anaconda.org/conda-forge/win-64/cython-0.29.35-py38hd3f51b4_0.conda#b4529ae0e6ffa88bd31dbfd25a733977
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
+https://conda.anaconda.org/conda-forge/win-64/cython-0.29.36-py38hd3f51b4_0.conda#833ac4958386cca8167d9d9e83f7a427
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/win-64/freetype-2.12.1-h546665d_1.conda#1b513009cd012591f3fdc9e03a74ec0a
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.4-py38hb1fd069_1.tar.bz2#1dcc50e3241f9e4e59713eec2653abd5
-https://conda.anaconda.org/conda-forge/win-64/libclang-15.0.7-default_h77d9078_2.conda#70188b1b3e0b1716405adab9050894d1
-https://conda.anaconda.org/conda-forge/win-64/libglib-2.76.3-he8f3873_0.conda#4695e6acaf4790170161048d56cb51fc
-https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.9.1-cpu_hadd60ae_5.conda#26867ad630a49c49fc123abfde634c7e
-https://conda.anaconda.org/conda-forge/win-64/libtiff-4.5.0-h6c8260b_6.conda#12628df645fcf0f74922138858724831
+https://conda.anaconda.org/conda-forge/win-64/kiwisolver-1.4.5-py38hb1fd069_0.conda#81172de96829ef2bd4e00301d49787de
+https://conda.anaconda.org/conda-forge/win-64/libclang-15.0.7-default_h77d9078_3.conda#71c8b6249c9e9e18b3aec705e95c1040
+https://conda.anaconda.org/conda-forge/win-64/libglib-2.76.4-he8f3873_0.conda#8c3f24acdc0403eeb3fb42ab75e8a659
+https://conda.anaconda.org/conda-forge/win-64/libhwloc-2.9.2-default_haede6df_1009.conda#90c9f598d15fc285c12f8c7d4c397f2e
+https://conda.anaconda.org/conda-forge/win-64/libtiff-4.5.1-h6c8260b_1.conda#5faa8734cee2590b6d3615e06bfce4f8
 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
 https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727
 https://conda.anaconda.org/conda-forge/win-64/pthread-stubs-0.4-hcd874cb_1001.tar.bz2#a1f820480193ea83582b13249a7e7bd9
 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc
-https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe
+https://conda.anaconda.org/conda-forge/noarch/setuptools-68.1.2-pyhd8ed1ab_0.conda#4fe12573bf499ff85a0a364e00cc5c53
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
-https://conda.anaconda.org/conda-forge/win-64/tornado-6.3.2-py38h91455d4_0.conda#3e625e06e8892112acb47695eaf22b47
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352
+https://conda.anaconda.org/conda-forge/win-64/tornado-6.3.3-py38h91455d4_0.conda#317a39276a96a1aa8c96f174366c2f19
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.7.1-pyha770c72_0.conda#c39d6a09fe819de4951c2642629d9115
 https://conda.anaconda.org/conda-forge/win-64/unicodedata2-15.0.0-py38h91455d4_0.tar.bz2#7a135e40d9f26c15419e5e82e1c436c0
-https://conda.anaconda.org/conda-forge/noarch/wheel-0.40.0-pyhd8ed1ab_0.conda#49bb0d9e60ce1db25e151780331bb5f3
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.2-pyhd8ed1ab_0.conda#1ccd092478b3e0ee10d7a891adbf8a4f
 https://conda.anaconda.org/conda-forge/noarch/win_inet_pton-1.1.0-pyhd8ed1ab_6.tar.bz2#30878ecc4bd36e8deeea1e3c151b2e0b
 https://conda.anaconda.org/conda-forge/win-64/xorg-libxau-1.0.11-hcd874cb_0.conda#c46ba8712093cb0114404ae8a7582e1a
 https://conda.anaconda.org/conda-forge/win-64/xorg-libxdmcp-1.1.3-hcd874cb_0.tar.bz2#46878ebb6b9cbd8afcf8088d7ef00ece
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf
-https://conda.anaconda.org/conda-forge/win-64/brotli-1.0.9-hcfcfb64_8.tar.bz2#2e661f21e1741c11506bdc7226e6b0bc
-https://conda.anaconda.org/conda-forge/win-64/coverage-7.2.7-py38h91455d4_0.conda#2fa3faef0a7b6a5da2bff0faddbfbc68
-https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.76.3-h12be248_0.conda#3015483cb3ffa200d51aac3c691fcda0
-https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9
-https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.16.2-pyhd8ed1ab_0.conda#2da0451b54c4563c32490cb1b7cf68a1
+https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-hcfcfb64_0.conda#4a65489faacc6b6dbd3e0d0572a3a9b9
+https://conda.anaconda.org/conda-forge/win-64/coverage-7.3.0-py38h91455d4_0.conda#738b7e58d7a86a79d73841b2f88941ed
+https://conda.anaconda.org/conda-forge/win-64/glib-tools-2.76.4-h12be248_0.conda#88237d3ddd338164196043cb7e927246
+https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.0.1-pyhd8ed1ab_0.conda#d978c61aa5fc2c69380d53ad56b5ae86
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
 https://conda.anaconda.org/conda-forge/win-64/lcms2-2.15-h3e3b177_1.conda#a76c36ad1b4b87f038d67890122d08ec
 https://conda.anaconda.org/conda-forge/win-64/libxcb-1.15-hcd874cb_0.conda#090d91b69396f14afef450c285f9758c
 https://conda.anaconda.org/conda-forge/win-64/openjpeg-2.5.0-ha2aaf27_2.conda#db0490689232e8e38c312281df6f31a2
-https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df
+https://conda.anaconda.org/conda-forge/noarch/pip-23.2.1-pyhd8ed1ab_0.conda#e2783aa3f9235225eec92f9081c5b801
 https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyh0701188_6.tar.bz2#56cd9fe388baac0e90c7149cfac95b60
+https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/win-64/sip-6.7.9-py38hd3f51b4_0.conda#b963e96205cfc5e98bc852a8e9349e22
-https://conda.anaconda.org/conda-forge/win-64/tbb-2021.9.0-h91493d7_0.conda#6aa3f1becefeaa00a4d2a79b2a478aee
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
-https://conda.anaconda.org/conda-forge/win-64/fonttools-4.39.4-py38h91455d4_0.conda#9eb3fd3d1aed8bc15853dd978d9abcdb
-https://conda.anaconda.org/conda-forge/win-64/glib-2.76.3-h12be248_0.conda#fa3f1af2dc70e0d00a755667a741fad3
-https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b
+https://conda.anaconda.org/conda-forge/win-64/sip-6.7.11-py38hd3f51b4_0.conda#c5ddea7d96c83d73f33c80e94791c904
+https://conda.anaconda.org/conda-forge/win-64/tbb-2021.10.0-h91493d7_0.conda#348275b42ff7638e7798ac61e073f864
+https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.7.1-hd8ed1ab_0.conda#f96688577f1faa58096d06a45136afa2
+https://conda.anaconda.org/conda-forge/win-64/fonttools-4.42.1-py38h91455d4_0.conda#296f16446a00d56f5c6ee0dbdd1c397c
+https://conda.anaconda.org/conda-forge/win-64/glib-2.76.4-h12be248_0.conda#4d7ae53ee4b7e08f3fbd1d3a7efd4812
+https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.0.1-pyhd8ed1ab_0.conda#54661981fd331e20847d8a49543dd9af
 https://conda.anaconda.org/conda-forge/win-64/mkl-2022.1.0-h6a75c08_874.tar.bz2#2ff89a7337a9636029b4db9466e9f8e3
-https://conda.anaconda.org/conda-forge/win-64/pillow-9.5.0-py38ha7eb54a_1.conda#a7066629f65b5a301e76114e06a91096
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35
-https://conda.anaconda.org/conda-forge/win-64/pyqt5-sip-12.11.0-py38hd3f51b4_3.conda#948a9d38ac004da975f9862194c25f68
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d
-https://conda.anaconda.org/conda-forge/win-64/gstreamer-1.22.3-h6b5321d_1.conda#00afb31665a8028ca2ff9af61fea64e1
-https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-17_win64_mkl.conda#9e42ac6b256b96bfaa19f829c25940e8
-https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2022.1.0-h57928b3_875.tar.bz2#6319a06307af296c1dfae93687c283b2
+https://conda.anaconda.org/conda-forge/win-64/pillow-10.0.0-py38ha7eb54a_0.conda#7b0af6e39802b71b97b1688efb9d2d41
+https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236
+https://conda.anaconda.org/conda-forge/win-64/pyqt5-sip-12.12.2-py38hd3f51b4_4.conda#954d265587c13dd5b2cc7f62c1a0f082
 https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda#06eb685a3a0b146347a58dda979485da
 https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e
+https://conda.anaconda.org/conda-forge/win-64/gstreamer-1.22.5-hb4038d2_0.conda#489c30d6f35bdbea9a79c369cde19c6f
+https://conda.anaconda.org/conda-forge/win-64/libblas-3.9.0-17_win64_mkl.conda#9e42ac6b256b96bfaa19f829c25940e8
+https://conda.anaconda.org/conda-forge/win-64/mkl-devel-2022.1.0-h57928b3_875.tar.bz2#6319a06307af296c1dfae93687c283b2
+https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
-https://conda.anaconda.org/conda-forge/win-64/gst-plugins-base-1.22.3-h001b923_1.conda#bd6347f397891bf4eb264c652221507c
+https://conda.anaconda.org/conda-forge/win-64/gst-plugins-base-1.22.5-h001b923_0.conda#56cea78b747eaf6e8ec60cfbbf0416ff
 https://conda.anaconda.org/conda-forge/win-64/libcblas-3.9.0-17_win64_mkl.conda#768b2c3be666ecf9e62f939ea919f819
 https://conda.anaconda.org/conda-forge/win-64/liblapack-3.9.0-17_win64_mkl.conda#278121fe8f0d65d496998aa290f36322
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6
-https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
 https://conda.anaconda.org/conda-forge/win-64/liblapacke-3.9.0-17_win64_mkl.conda#6c98bb1c41479063f089459dcdedcecb
-https://conda.anaconda.org/conda-forge/win-64/numpy-1.24.3-py38h1d91fd2_0.conda#2768aa0aa44da206dc5fc3d1ba6ad857
-https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.8-h2c8576c_13.conda#b00e4814feb5fa92b864ef031130c2cf
+https://conda.anaconda.org/conda-forge/win-64/numpy-1.24.4-py38h1d91fd2_0.conda#bb13551a7913ff4de74df687f03ba14e
+https://conda.anaconda.org/conda-forge/win-64/qt-main-5.15.8-h063a7da_15.conda#b18be6e14e118fdb0eb37de227d0f74b
 https://conda.anaconda.org/conda-forge/win-64/blas-devel-3.9.0-17_win64_mkl.conda#bfcbcc96906ca944d944eb4ae340371a
-https://conda.anaconda.org/conda-forge/win-64/contourpy-1.0.7-py38hb1fd069_0.conda#6b53200dddcec578cdd90cac146eeadd
-https://conda.anaconda.org/conda-forge/win-64/pyqt-5.15.7-py38hd6c051e_3.conda#9b17c0bbf19c6e265c3967e33df8770a
+https://conda.anaconda.org/conda-forge/win-64/contourpy-1.1.0-py38hb1fd069_0.conda#a2353aab50907b4a3e86215ecdbca67c
+https://conda.anaconda.org/conda-forge/win-64/pyqt-5.15.9-py38hd6c051e_4.conda#05296d250de47392005fc6f0a611ec54
 https://conda.anaconda.org/conda-forge/win-64/scipy-1.10.1-py38h1aea9ed_3.conda#1ed766b46170f86ead2ae6b9b8151191
 https://conda.anaconda.org/conda-forge/win-64/blas-2.117-mkl.conda#a6b489be6ddbc3259df7cc8a440b8950
-https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.7.1-py38h528a6c7_0.conda#0aebccad15d74ec7f1bc3d62497ad1a8
-https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.7.1-py38haa244fe_0.conda#f41a8af387463a78ad87571c767d0d80
+https://conda.anaconda.org/conda-forge/win-64/matplotlib-base-3.7.2-py38h2d9580e_0.conda#7657929f7db31a6b95b96f2bf489e2b2
+https://conda.anaconda.org/conda-forge/win-64/matplotlib-3.7.2-py38haa244fe_0.conda#586ba23b670ae039be69070823f740bd
diff --git a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml b/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml
index bbbb3bb4cef6c..cfafd4c2bddec 100644
--- a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml
+++ b/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_environment.yml
@@ -8,7 +8,7 @@ dependencies:
   - numpy
   - blas[build=openblas]
   - scipy
-  - cython
+  - cython<3.0.0
   - joblib
   - threadpoolctl
   - matplotlib
diff --git a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock b/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock
index 83b59e621f828..99dea64ba839f 100644
--- a/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock
+++ b/build_tools/azure/py38_conda_forge_openblas_ubuntu_2204_linux-64_conda.lock
@@ -1,9 +1,9 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: d249329b78962bdba40d2f7d66c3a94b4caaced25b05b3bc95f39dda6c72aebe
+# input_hash: 45129d4891600cbebed054af93f99df16f4911de4fc4048069748c4c59c09eef
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
@@ -17,7 +17,7 @@ https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.1.0-h69a702a_0
 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab
 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793
 https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c
-https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f
+https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.9-hd590300_0.conda#a0c6f0e7e1a467f5678f94dea18c8aa7
 https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00
 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54
 https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37
@@ -26,7 +26,7 @@ https://conda.anaconda.org/conda-forge/linux-64/icu-72.1-hcb278e6_0.conda#7c8d20
 https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
 https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51
 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_0.conda#e805cbec4c29feb22e019245f7e47b6c
 https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8
 https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd
 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
@@ -38,16 +38,15 @@ https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#
 https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.23-pthreads_h80387f5_0.conda#9c5ea51ccb8ffae7d06c645869d24ce6
 https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f
 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
-https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408
-https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41
+https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.1-hd590300_0.conda#82bf6f63eb15ef719b556b63feec3a77
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad
 https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0
 https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898
 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179
 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1
-https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.2-hd590300_0.conda#e5ac5227582d6c83ccf247288c0eb095
 https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19
 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
-https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.38-h0b41bf4_0.conda#9ac34337e5101a87e5d91da05d84aa48
 https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
@@ -59,121 +58,122 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007
 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
 https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920
 https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-17_linux64_openblas.conda#57fb44770b1bc832fb2dbefa1bd502de
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25
-https://conda.anaconda.org/conda-forge/linux-64/libcap-2.67-he9d0100_0.conda#d05556c80caffff164d17bdea0105a1a
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_0.conda#43017394a280a42b48d11d2a6e169901
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_0.conda#8e3e1cb77c4b355a3776bdfb74095bed
+https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5
 https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
 https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d
-https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0
-https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.46-h620e276_0.conda#27e745f6f2e4b757e95dd7225fbe6bdb
+https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869
+https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a
 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416
-https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.43.0-h2797004_0.conda#903fa782a9067d5934210df6d79220f6
 https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0
 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c
-https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.4-h0d562d8_0.conda#e46fad17d5fb57316b956f88dca765e4
-https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.32-hf1915f5_2.conda#cf4a8f520fdad3a63bb2bce74576cd2d
+https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.5-h0d562d8_0.conda#558ab736404275d7df61c473c1af35aa
+https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.33-hf1915f5_2.conda#a55ff0ed12efd86cf3a3dfb750adb950
 https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.23-pthreads_h855a84d_0.conda#ba8810202f8879562f01b4f9957c1ada
 https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b
 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6
-https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295
-https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555
-https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589
+https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_0.conda#aeafb07a327e3f14a796bf081ea07472
 https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83
 https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06
-https://conda.anaconda.org/conda-forge/linux-64/krb5-1.20.1-h81ceb04_0.conda#89a41adce7106749573d883b2f657d78
+https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844
 https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-17_linux64_openblas.conda#7ef0969b00fe3d6eef56a8151d3afb29
 https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9
-https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.3-hebfc3b9_0.conda#a64f11b244b2c112cd3fa1cbe9493999
+https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.4-hebfc3b9_0.conda#c6f951789c888f7bbd2dd6858eab69de
 https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-17_linux64_openblas.conda#a2103882c46492e26500fcb56c03de8b
-https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_2.conda#5c0a511fa7d223d8661fefcf77b2a877
-https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.0-hb75c966_0.conda#c648d19cd9c8625898d5d370414de7c7
-https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748
-https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b
-https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea
-https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.32-hca2cd23_2.conda#20b4708cd04bdc8138d03314ddd97885
-https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099
-https://conda.anaconda.org/conda-forge/linux-64/python-3.8.16-he550d4f_1_cpython.conda#9de84cccfbc5f8350a3667bb6ef6fc30
+https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_3.conda#9efe82d44b76a7529a1d702e5a37752e
+https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hbc2eb40_0.conda#38f84d395629e48b7c7b48a8ca740341
+https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.1-h8b53f26_1.conda#5b09e13d732dda1a2bc9adc711164f4d
+https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.6-h4dfa4b3_0.conda#b096c85c415519259e731d8fb719a3ef
+https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_2.conda#b2f09078f50b9e859aca3f0dc1cc8b7e
+https://conda.anaconda.org/conda-forge/linux-64/nss-3.92-h1d7d5a4_0.conda#22c89a3d87828fe925b310b9cdf0f574
+https://conda.anaconda.org/conda-forge/linux-64/python-3.8.17-he550d4f_0_cpython.conda#72d038de0a228e4f0ef4011940641293
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d
-https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.6-h8ee46fc_0.conda#7590b76c3d11d21caa44f3fc38ac584a
+https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_0.conda#3db48055eab680e43a122e2c7494e7ae
+https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py38h17151c0_0.conda#5b332445993432e76df706fe1ebe776d
+https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
-https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py38h17151c0_0.conda#551ebaa88e71c13dbede1b60a80acf7b
+https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.36-py38h17151c0_0.conda#db0e2e64a90671e2b0821c785e1d90f2
 https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
-https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.3-hfc55251_0.conda#8951eedf3cdf94dd733c1b5eee1f4880
+https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.4-hfc55251_0.conda#76ac435b8668f636a39fcb155c3543fd
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py38h43d8883_1.tar.bz2#41ca56d5cac7bfc7eb4fcdbee878eb84
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py38h7f3f72f_0.conda#eec56ac40315e360dd57c2de6604a325
 https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28
-https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_2.conda#907344cee64101d44d806bbe0fccb01d
-https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h36d4200_3.conda#c9f4416a34bc91e0eb029f912c68f81f
+https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_3.conda#1720df000b48e31842500323cb7be18c
+https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3
 https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-17_linux64_openblas.conda#949709aa6ee6a2dcdb3de6dd99147d17
-https://conda.anaconda.org/conda-forge/linux-64/libpq-15.3-hbcd7760_1.conda#8afb2a97d256ffde95b91a6283bc598c
-https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-253-h8c4010b_1.conda#9176b1e2cb8beca37a7510b0e801e38f
+https://conda.anaconda.org/conda-forge/linux-64/libpq-15.4-hfc447b1_0.conda#b9ce311e7aba8b5fc3122254f0a6e97e
+https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-254-h3516f8a_0.conda#df4b1cd0c91b4234fb02b5701a4cdddc
 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
-https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py38h59b608b_0.conda#5836e4ab0399136ede58446a4776b2ff
+https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.4-py38h59b608b_0.conda#8c3e050afeeb2b32575bdb8955cc67b2
 https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea
 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
 https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727
 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc
 https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
 https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34
 https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31
-https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe
+https://conda.anaconda.org/conda-forge/noarch/setuptools-68.1.2-pyhd8ed1ab_0.conda#4fe12573bf499ff85a0a364e00cc5c53
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
-https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py38h01eb140_0.conda#3db869202b0e523d606d13e81ca79ab6
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py38h01eb140_0.conda#465bbfc0eb2022837d957d045b6b627a
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.7.1-pyha770c72_0.conda#c39d6a09fe819de4951c2642629d9115
 https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py38h0a891b7_0.tar.bz2#44421904760e9f5ae2035193e04360f0
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73
+https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.39-hd590300_0.conda#d88c7fc8a11858fb14761832e4da1954
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.10-h7f98852_1003.tar.bz2#f59c1242cc1dd93e72c2ee2b360979eb
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.16.2-pyhd8ed1ab_0.conda#2da0451b54c4563c32490cb1b7cf68a1
 https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-17_linux64_openblas.conda#fde382e41d77b65315fab79ab93a20ab
 https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-hbbf8b49_1016.conda#c1dd96500b9b1a75e9e511931f415cbc
-https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py38hfbd4bf9_0.conda#638537863b298151635c05c762a997ab
-https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py38h01eb140_0.conda#8eb5a370d618aa8a65dee377153a3451
-https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.3-hfc55251_0.conda#950e02f5665f5f4ff0437a6acba58798
-https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9
-https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
-https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_2.conda#1a4fe5162abe4a19b5a9dedf158a0ff9
-https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py38h885162f_1.conda#0eec8a20a17f17ec9e0b6839be466866
+https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.1.0-py38h7f3f72f_0.conda#0fdf3cc879156e0234e05962d55b6502
+https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.42.1-py38h01eb140_0.conda#2badb9c3e1f9c3e51c0e69146f7be4d4
+https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.4-hfc55251_0.conda#dbcec5fd9c6c8be24b23575048755a59
+https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.0.1-pyhd8ed1ab_0.conda#d978c61aa5fc2c69380d53ad56b5ae86
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
+https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_3.conda#0922208521c0463e690bbaebba7eb551
+https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b
+https://conda.anaconda.org/conda-forge/linux-64/pillow-10.0.0-py38h885162f_0.conda#777c54134d5422a867aed7084cf5db5e
 https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_4.conda#8f349ca16d30950aa00870484d9d30c4
+https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.9-py38h17151c0_0.conda#6a54fd42b71a8b1c5f9c4a691270cdf1
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d
+https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.11-py38h17151c0_0.conda#f05f0120127bac812e948b02997e4374
+https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.7.1-hd8ed1ab_0.conda#f96688577f1faa58096d06a45136afa2
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e
 https://conda.anaconda.org/conda-forge/linux-64/blas-2.117-openblas.conda#54b4b02b897156056f3056f992261d0c
-https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.3-h977cf35_1.conda#410ed3b168e5a139d12ebaf4143072cd
+https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.5-h98fc4e7_0.conda#2f45c1da3828ec2dc44d84b68916e3e7
 https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-7.3.0-hdb3a94d_0.conda#765bc76c0dfaf24ff9d8a2935b2510df
-https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b
-https://conda.anaconda.org/conda-forge/linux-64/pandas-2.0.2-py38h01efb38_0.conda#71066496987a1b50632526154e3d9711
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35
-https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py38h8dc9893_3.conda#7bb0328b4a0f857aeb432426b9a5f908
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4
+https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.0.1-pyhd8ed1ab_0.conda#54661981fd331e20847d8a49543dd9af
+https://conda.anaconda.org/conda-forge/linux-64/pandas-2.0.3-py38h01efb38_1.conda#01a2b6144e65631e2fe24e569d0738ee
+https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236
+https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py38h17151c0_4.conda#95447fd7bd5b420df7e7eb405f19f463
+https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
-https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.3-h938bd60_1.conda#1f317eb7f00db75f4112a07476345376
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py38hd6c3c57_0.conda#3b8ba76acae09fbd4b2247c4ee4c0324
+https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.5-hf7dbed1_0.conda#ad8e8068208846032d6e9ce73d406cee
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.2-py38hf5b0b65_0.conda#e98a10bb5dd27b540d07a4d37214b85c
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6
-https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
-https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h01ceb2d_13.conda#99ca83a166224f46a62c9545b8d66401
+https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h7fe3ca9_15.conda#f09d307dd78e61e4eb2c6c2f81056d0e
 https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py38h59b608b_3.conda#2f2a57462fcfbc67dfdbb0de6f7484c2
-https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py38h757e2ef_0.conda#b935895fb7ba4717f07688f2b1f4f567
-https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py38ha0d8c90_3.conda#e965dc172d67920d058ac2b3a0e27565
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py38h578d9bd_0.conda#50ff9e0a3dd459a0ca365741072bf9a2
+https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.1-py38h905acbe_0.conda#82480fd649a2e1ed61baac8b0e0387a2
+https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py38hffdaa6c_4.conda#8a230666b1e346b9bc995a8eef0c732e
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.2-py38h578d9bd_0.conda#ab4f8f744029755778a4876559bec6ec
diff --git a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
index 673981be3e05e..3a3040b5e68e7 100644
--- a/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
+++ b/build_tools/azure/pylatest_conda_forge_mkl_linux-64_conda.lock
@@ -3,7 +3,7 @@
 # input_hash: 56e8dae95dcae13cac7ca1898bda12f1408bcea8a1aeb587ced409672f398a4b
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
@@ -45,7 +45,7 @@ https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318
 https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898
 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179
 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1
-https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.2-hd590300_0.conda#e5ac5227582d6c83ccf247288c0eb095
 https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19
 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
 https://conda.anaconda.org/conda-forge/linux-64/sleef-3.5.1-h9b69904_2.tar.bz2#6e016cf4c525d04a7bd038cee53ad3fd
@@ -83,7 +83,7 @@ https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63
 https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4
 https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83
 https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06
-https://conda.anaconda.org/conda-forge/linux-64/krb5-1.20.1-h81ceb04_0.conda#89a41adce7106749573d883b2f657d78
+https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844
 https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9
 https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.3-hebfc3b9_0.conda#a64f11b244b2c112cd3fa1cbe9493999
 https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.9.1-cuda112_haf10fcf_5.conda#b8996ffa972161676ba6972af4c41384
@@ -103,18 +103,18 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.con
 https://conda.anaconda.org/conda-forge/noarch/array-api-compat-1.2-pyhd8ed1ab_0.conda#3d34f2f6987f8d098ab00198c170a77e
 https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418
 https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
-https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py311hb755f60_0.conda#17f4738a1ca6155a63d2a0cbd3e4a8b1
+https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.2-py311hb755f60_0.conda#81d4eacf7eb2d40beee33aa71e8f94ad
 https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
-https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.3-hfc55251_0.conda#8951eedf3cdf94dd733c1b5eee1f4880
+https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.4-hfc55251_0.conda#76ac435b8668f636a39fcb155c3543fd
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py311h4dd048b_1.tar.bz2#46d451f575392c01dc193069bd89766d
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py311h9547e67_0.conda#f53903649188b99e6b44c560c69f5b23
 https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28
 https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_2.conda#907344cee64101d44d806bbe0fccb01d
 https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h36d4200_3.conda#c9f4416a34bc91e0eb029f912c68f81f
@@ -132,8 +132,8 @@ https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.
 https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31
 https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.9.0-hf52228f_0.conda#f495e42d3d2020b025705625edf35490
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.10.0-h00ab1b0_0.conda#9c82b1b389e46b64ec685ec487043e70
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
 https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py311h459d7ec_0.conda#12b1c374ee90a1aa11ea921858394dc8
@@ -181,4 +181,4 @@ https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py311h64a7726_3.con
 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py311h8597a09_0.conda#70c3b734ffe82c16b6d121aaa11929a8
 https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py311hcb41070_0.conda#af2d6818c526791fb81686c554ab262b
 https://conda.anaconda.org/conda-forge/linux-64/pytorch-cpu-1.13.1-cpu_py311hdb170b5_1.conda#a805d5f103e493f207613283d8acbbe1
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py311h38be061_0.conda#8fd462c8bcbba5a3affcb2d04e387476
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.2-py311h38be061_0.conda#c056ffab165096669389e5a4eea4dc4d
diff --git a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock
index e2252fa80607f..ca55037ef7936 100644
--- a/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock
+++ b/build_tools/azure/pylatest_conda_forge_mkl_no_coverage_linux-64_conda.lock
@@ -1,9 +1,9 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: 28f25ea7bcf22e93278ac96747ca9700ada47330f6e3ed927edb73ab4a4c153e
+# input_hash: 223cf367742008b437f38ff4642c0e70494f665cf9434d4da5c6483c757397fd
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
@@ -19,17 +19,16 @@ https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.1.0-h69a702a_0
 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab
 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793
 https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c
-https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f
+https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.9-hd590300_0.conda#a0c6f0e7e1a467f5678f94dea18c8aa7
 https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00
 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54
-https://conda.anaconda.org/conda-forge/linux-64/cudatoolkit-11.8.0-h37601d7_11.conda#9d166760c8cfa83e2fc989928312da3d
 https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37
 https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220
 https://conda.anaconda.org/conda-forge/linux-64/icu-72.1-hcb278e6_0.conda#7c8d20d847bb45f56bd941578fcfa146
 https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
 https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51
 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_0.conda#e805cbec4c29feb22e019245f7e47b6c
 https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8
 https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd
 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
@@ -40,16 +39,15 @@ https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#
 https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680
 https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f
 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
-https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408
-https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41
+https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.1-hd590300_0.conda#82bf6f63eb15ef719b556b63feec3a77
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad
 https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0
 https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898
 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179
 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1
-https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.2-hd590300_0.conda#e5ac5227582d6c83ccf247288c0eb095
 https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19
 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
-https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.38-h0b41bf4_0.conda#9ac34337e5101a87e5d91da05d84aa48
 https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
@@ -60,122 +58,122 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f9
 https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15
 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
 https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25
-https://conda.anaconda.org/conda-forge/linux-64/libcap-2.67-he9d0100_0.conda#d05556c80caffff164d17bdea0105a1a
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_0.conda#43017394a280a42b48d11d2a6e169901
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_0.conda#8e3e1cb77c4b355a3776bdfb74095bed
+https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5
 https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
 https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d
-https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0
-https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.46-h620e276_0.conda#27e745f6f2e4b757e95dd7225fbe6bdb
+https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869
+https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a
 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416
-https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.43.0-h2797004_0.conda#903fa782a9067d5934210df6d79220f6
 https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0
 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c
-https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.4-h0d562d8_0.conda#e46fad17d5fb57316b956f88dca765e4
-https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.32-hf1915f5_2.conda#cf4a8f520fdad3a63bb2bce74576cd2d
+https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.5-h0d562d8_0.conda#558ab736404275d7df61c473c1af35aa
+https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.33-hf1915f5_2.conda#a55ff0ed12efd86cf3a3dfb750adb950
 https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b
 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6
-https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295
-https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555
-https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589
+https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_0.conda#aeafb07a327e3f14a796bf081ea07472
 https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83
 https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06
-https://conda.anaconda.org/conda-forge/linux-64/krb5-1.20.1-h81ceb04_0.conda#89a41adce7106749573d883b2f657d78
+https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844
 https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9
-https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.3-hebfc3b9_0.conda#a64f11b244b2c112cd3fa1cbe9493999
-https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.9.1-cuda112_haf10fcf_5.conda#b8996ffa972161676ba6972af4c41384
-https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_2.conda#5c0a511fa7d223d8661fefcf77b2a877
-https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.0-hb75c966_0.conda#c648d19cd9c8625898d5d370414de7c7
-https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748
-https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b
-https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea
-https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.32-hca2cd23_2.conda#20b4708cd04bdc8138d03314ddd97885
-https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099
-https://conda.anaconda.org/conda-forge/linux-64/python-3.11.3-h2755cc3_0_cpython.conda#37005ea5f68df6a8a381b70cf4d4a160
+https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.4-hebfc3b9_0.conda#c6f951789c888f7bbd2dd6858eab69de
+https://conda.anaconda.org/conda-forge/linux-64/libhwloc-2.9.2-default_h554bfaf_1009.conda#9369f407667517fe52b0e8ed6965ffeb
+https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_3.conda#9efe82d44b76a7529a1d702e5a37752e
+https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hbc2eb40_0.conda#38f84d395629e48b7c7b48a8ca740341
+https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.1-h8b53f26_1.conda#5b09e13d732dda1a2bc9adc711164f4d
+https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.6-h4dfa4b3_0.conda#b096c85c415519259e731d8fb719a3ef
+https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_2.conda#b2f09078f50b9e859aca3f0dc1cc8b7e
+https://conda.anaconda.org/conda-forge/linux-64/nss-3.92-h1d7d5a4_0.conda#22c89a3d87828fe925b310b9cdf0f574
+https://conda.anaconda.org/conda-forge/linux-64/python-3.11.5-hab00c5b_0_cpython.conda#f0288cb82594b1cbc71111d1cd3c5422
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d
-https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.6-h8ee46fc_0.conda#7590b76c3d11d21caa44f3fc38ac584a
+https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_0.conda#3db48055eab680e43a122e2c7494e7ae
+https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py311hb755f60_0.conda#b8128d083dbf6abd472b1a3e98b0b83d
+https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
-https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py311hb755f60_0.conda#17f4738a1ca6155a63d2a0cbd3e4a8b1
+https://conda.anaconda.org/conda-forge/linux-64/cython-3.0.2-py311hb755f60_0.conda#81d4eacf7eb2d40beee33aa71e8f94ad
 https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
-https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.3-hfc55251_0.conda#8951eedf3cdf94dd733c1b5eee1f4880
+https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.4-hfc55251_0.conda#76ac435b8668f636a39fcb155c3543fd
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py311h4dd048b_1.tar.bz2#46d451f575392c01dc193069bd89766d
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py311h9547e67_0.conda#f53903649188b99e6b44c560c69f5b23
 https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28
-https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_2.conda#907344cee64101d44d806bbe0fccb01d
-https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h36d4200_3.conda#c9f4416a34bc91e0eb029f912c68f81f
-https://conda.anaconda.org/conda-forge/linux-64/libpq-15.3-hbcd7760_1.conda#8afb2a97d256ffde95b91a6283bc598c
-https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-253-h8c4010b_1.conda#9176b1e2cb8beca37a7510b0e801e38f
+https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_3.conda#1720df000b48e31842500323cb7be18c
+https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3
+https://conda.anaconda.org/conda-forge/linux-64/libpq-15.4-hfc447b1_0.conda#b9ce311e7aba8b5fc3122254f0a6e97e
+https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-254-h3516f8a_0.conda#df4b1cd0c91b4234fb02b5701a4cdddc
 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
 https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea
 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
 https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727
 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc
 https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
 https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34
 https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31
-https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe
+https://conda.anaconda.org/conda-forge/noarch/setuptools-68.1.2-pyhd8ed1ab_0.conda#4fe12573bf499ff85a0a364e00cc5c53
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.9.0-hf52228f_0.conda#f495e42d3d2020b025705625edf35490
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/linux-64/tbb-2021.10.0-h00ab1b0_0.conda#9c82b1b389e46b64ec685ec487043e70
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
-https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py311h459d7ec_0.conda#12b1c374ee90a1aa11ea921858394dc8
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py311h459d7ec_0.conda#7d9a31416c18704f55946ff7cf8da5dc
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.7.1-pyha770c72_0.conda#c39d6a09fe819de4951c2642629d9115
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73
+https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.39-hd590300_0.conda#d88c7fc8a11858fb14761832e4da1954
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.10-h7f98852_1003.tar.bz2#f59c1242cc1dd93e72c2ee2b360979eb
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530
 https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-hbbf8b49_1016.conda#c1dd96500b9b1a75e9e511931f415cbc
-https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py311h459d7ec_0.conda#ddd2cd004e10bc7a1e042283326cbf91
-https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.3-hfc55251_0.conda#950e02f5665f5f4ff0437a6acba58798
-https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
-https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_2.conda#1a4fe5162abe4a19b5a9dedf158a0ff9
+https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.42.1-py311h459d7ec_0.conda#fc327c0ea015db3b6484eabb37d44e60
+https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.4-hfc55251_0.conda#dbcec5fd9c6c8be24b23575048755a59
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
+https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_3.conda#0922208521c0463e690bbaebba7eb551
+https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b
 https://conda.anaconda.org/conda-forge/linux-64/mkl-2022.1.0-h84fe81f_915.tar.bz2#b9c8f925797a93dbff45e1626b025a6b
-https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py311h0b84326_1.conda#6be2190fdbf26a6c1d3356a54d955237
+https://conda.anaconda.org/conda-forge/linux-64/pillow-10.0.0-py311h0b84326_0.conda#4b24acdc1fbbae9da03147e7d2cf8c8a
 https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_4.conda#8f349ca16d30950aa00870484d9d30c4
+https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.9-py311hb755f60_0.conda#2b5430f2f1651f460c852e1fdd549184
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d
-https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.3-h977cf35_1.conda#410ed3b168e5a139d12ebaf4143072cd
+https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.11-py311hb755f60_0.conda#17d25ab64a32872b349579fdb07bbdb2
+https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.7.1-hd8ed1ab_0.conda#f96688577f1faa58096d06a45136afa2
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e
+https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.5-h98fc4e7_0.conda#2f45c1da3828ec2dc44d84b68916e3e7
 https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-7.3.0-hdb3a94d_0.conda#765bc76c0dfaf24ff9d8a2935b2510df
 https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-16_linux64_mkl.tar.bz2#85f61af03fd291dae33150ffe89dc09a
 https://conda.anaconda.org/conda-forge/linux-64/mkl-devel-2022.1.0-ha770c72_916.tar.bz2#69ba49e445f87aea2cba343a71a35ca2
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35
-https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py311hcafe171_3.conda#0d79df2a96f6572fed2883374400b235
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4
+https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236
+https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py311hb755f60_4.conda#3cff4c98f775ff6439b95bb7917702e9
+https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
-https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.3-h938bd60_1.conda#1f317eb7f00db75f4112a07476345376
+https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.5-hf7dbed1_0.conda#ad8e8068208846032d6e9ce73d406cee
 https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-16_linux64_mkl.tar.bz2#361bf757b95488de76c4f123805742d3
 https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-16_linux64_mkl.tar.bz2#a2f166748917d6d6e4707841ca1f519e
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6
-https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
-https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-16_linux64_mkl.tar.bz2#44ccc4d4dca6a8d57fa17442bc64b5a1
-https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py311h64a7726_0.conda#f1d507e1a5f1151845f7818ceb02ba9f
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
-https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h01ceb2d_13.conda#99ca83a166224f46a62c9545b8d66401
+https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-16_linux64_mkl.tar.bz2#44ccc4d4dca6a8d57fa17442bc64b5a1
+https://conda.anaconda.org/conda-forge/linux-64/numpy-1.25.2-py311h64a7726_0.conda#71fd6f1734a0fa64d8f852ae7156ec45
+https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h7fe3ca9_15.conda#f09d307dd78e61e4eb2c6c2f81056d0e
 https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-16_linux64_mkl.tar.bz2#3f92c1c9e1c0e183462c5071aa02cae1
-https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py311ha3edf6b_0.conda#e7548e7f58965a2fe97a95950a5fedc6
-https://conda.anaconda.org/conda-forge/linux-64/pandas-2.0.2-py311h320fe9a_0.conda#509769b430266dc5c2f6a3eab0f23164
-https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py311ha74522f_3.conda#ad6dd0bed0cdf5f2d4eb2b989d6253b3
-https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py311h64a7726_3.conda#a01a3a7428e770db5a0c8c7ab5fce7f7
+https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.1.0-py311h9547e67_0.conda#daf3f23397ab2265d0cdfa339f3627ba
+https://conda.anaconda.org/conda-forge/linux-64/pandas-2.1.0-py311h320fe9a_0.conda#7f35501e126df510b250ad893482ef45
+https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py311hf0fb5b6_4.conda#afe5363b88d2e97266063558a6599bd0
+https://conda.anaconda.org/conda-forge/linux-64/scipy-1.11.2-py311h64a7726_0.conda#18d094fb8e4ac52f93a4f4857a8f1e8f
 https://conda.anaconda.org/conda-forge/linux-64/blas-2.116-mkl.tar.bz2#c196a26abf6b4f132c88828ab7c2231c
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py311h8597a09_0.conda#70c3b734ffe82c16b6d121aaa11929a8
-https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py311hcb41070_0.conda#af2d6818c526791fb81686c554ab262b
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py311h38be061_0.conda#8fd462c8bcbba5a3affcb2d04e387476
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.2-py311h54ef318_0.conda#2631a9e423855fb586c05f8a5ee8b177
+https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.1-py311h92ebd52_0.conda#d38def4818ac51732a1258807752b0d5
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.2-py311h38be061_0.conda#c056ffab165096669389e5a4eea4dc4d
diff --git a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock
index eff7998346172..5bcdab7fcaced 100644
--- a/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock
+++ b/build_tools/azure/pylatest_conda_forge_mkl_osx-64_conda.lock
@@ -1,20 +1,21 @@
 # Generated by conda-lock.
 # platform: osx-64
-# input_hash: b93f19a33e87617bd672a74b684ecbc39aba1924122ef1860af442118a396fbd
+# input_hash: 02abef27514db5e5119c3cdc253e84a06374c1b308495298b46bdb14dcc52ae9
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-h0d85af4_4.tar.bz2#37edc4e6304ca87316e160f5ca0bd1b5
-https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2023.5.7-h8857fd0_0.conda#b704e4b79ba0d887c4870b7b09d6a4df
-https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.0.9-hb7f2c08_8.tar.bz2#37157d273eaf3bc7d6862104161d9ec9
-https://conda.anaconda.org/conda-forge/osx-64/libcxx-16.0.5-hd57cbcb_0.conda#d34eed0a4fb993f0d934db6394ba23ef
+https://conda.anaconda.org/conda-forge/osx-64/ca-certificates-2023.7.22-h8857fd0_0.conda#bf2c54c18997bf3542af074c10191771
+https://conda.anaconda.org/conda-forge/osx-64/icu-73.2-hf5e326d_0.conda#5cc301d759ec03f28328428e28f65591
+https://conda.anaconda.org/conda-forge/osx-64/libbrotlicommon-1.1.0-h0dc2134_0.conda#5bfb3e0e889d051590c97a3ad190efb8
+https://conda.anaconda.org/conda-forge/osx-64/libcxx-16.0.6-hd57cbcb_0.conda#7d6972792161077908b62971802f289a
 https://conda.anaconda.org/conda-forge/osx-64/libdeflate-1.18-hac1461d_0.conda#3d131584456b277ce0871e6481fde49b
 https://conda.anaconda.org/conda-forge/osx-64/libexpat-2.5.0-hf0c8a7f_1.conda#6c81cb022780ee33435cca0127dd43c9
 https://conda.anaconda.org/conda-forge/osx-64/libffi-3.4.2-h0d85af4_5.tar.bz2#ccb34fb14960ad8b125962d3d79b31a9
-https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-11.3.0-h824d247_31.conda#ea203ba0aca5cd594aa3b1a2b32e5978
+https://conda.anaconda.org/conda-forge/noarch/libgfortran-devel_osx-64-12.3.0-h0b6f5ec_1.conda#ecc03a145b87ed6b8806fb02dc0e13c4
 https://conda.anaconda.org/conda-forge/osx-64/libiconv-1.17-hac89ed1_0.tar.bz2#691d103d11180486154af49c037b7ed9
 https://conda.anaconda.org/conda-forge/osx-64/libjpeg-turbo-2.1.5.1-hb7f2c08_0.conda#d7309a152b9b79799063b8bb47e34a3a
-https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.3.0-hb7f2c08_0.conda#18981e4c840126d6118d8952485fea51
-https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.2.13-hfd90126_4.tar.bz2#35eb3fce8d51ed3c1fd4122bad48250b
-https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-16.0.5-hff08bdf_0.conda#af8df1a61e8137e3479b0f71d5bd0a49
+https://conda.anaconda.org/conda-forge/osx-64/libwebp-base-1.3.1-h0dc2134_0.conda#a25a41b5be3fed4b671a58b998dcf89b
+https://conda.anaconda.org/conda-forge/osx-64/libzlib-1.2.13-h8a1eda9_5.conda#4a3ad23f6e16f99c04e166767193d700
+https://conda.anaconda.org/conda-forge/osx-64/llvm-openmp-16.0.6-hff08bdf_0.conda#39a5227d906f75102bf8586741690128
 https://conda.anaconda.org/conda-forge/osx-64/mkl-include-2022.1.0-h6bab518_928.tar.bz2#67f8511a5eaf693a202486f74035b3f7
 https://conda.anaconda.org/conda-forge/osx-64/ncurses-6.4-hf0c8a7f_0.conda#c3dbae2411164d9b02c69090a9a91857
 https://conda.anaconda.org/conda-forge/osx-64/pthread-stubs-0.4-hc929b4f_1001.tar.bz2#addd19059de62181cd11ae8f4ef26084
@@ -26,106 +27,106 @@ https://conda.anaconda.org/conda-forge/osx-64/xz-5.2.6-h775f41a_0.tar.bz2#a72f9d
 https://conda.anaconda.org/conda-forge/osx-64/gmp-6.2.1-h2e338ed_0.tar.bz2#dedc96914428dae572a39e69ee2a392f
 https://conda.anaconda.org/conda-forge/osx-64/isl-0.25-hb486fe8_0.tar.bz2#45a9a46c78c0ea5c275b535f7923bde3
 https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hb486fe8_0.tar.bz2#f9d6a4c82889d5ecedec1d90eb673c55
-https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.0.9-hb7f2c08_8.tar.bz2#7f952a036d9014b4dab96c6ea0f8c2a7
-https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.0.9-hb7f2c08_8.tar.bz2#b36a3bfe866d9127f25f286506982166
-https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-12.2.0-he409387_31.conda#5a544130e584b1f204ac896ff071d5b3
-https://conda.anaconda.org/conda-forge/osx-64/libllvm14-14.0.6-hc8e404f_3.conda#a6433d7252b49c2195f8aa70ad898104
+https://conda.anaconda.org/conda-forge/osx-64/libbrotlidec-1.1.0-h0dc2134_0.conda#a59106eac55636d52f3a40b860864fca
+https://conda.anaconda.org/conda-forge/osx-64/libbrotlienc-1.1.0-h0dc2134_0.conda#f95dd7223f586874ace2320d9fcd89bf
+https://conda.anaconda.org/conda-forge/osx-64/libgfortran5-12.3.0-hbd3c1fe_1.conda#209e462211f65827cdc01a0d7a72286f
 https://conda.anaconda.org/conda-forge/osx-64/libpng-1.6.39-ha978bb4_0.conda#35e4928794c5391aec14ffdf1deaaee5
-https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.42.0-h58db7d2_0.conda#a7d3b44b7b0c9901ac7813b7a0462893
+https://conda.anaconda.org/conda-forge/osx-64/libsqlite-3.43.0-h58db7d2_0.conda#e2195038e85e49e26fbeb7efc0ad38c4
 https://conda.anaconda.org/conda-forge/osx-64/libxcb-1.15-hb7f2c08_0.conda#5513f57e0238c87c12dffedbcc9c1a4a
-https://conda.anaconda.org/conda-forge/osx-64/openssl-3.1.1-h8a1eda9_1.conda#c7822d6ee74e34af1fd74365cfd18983
+https://conda.anaconda.org/conda-forge/osx-64/libxml2-2.11.5-h3346baf_1.conda#7584dee6af7de378aed0ae49aebedb8a
+https://conda.anaconda.org/conda-forge/osx-64/openssl-3.1.2-h8a1eda9_0.conda#85d5377436d19183c8ac5afbb8e713a1
 https://conda.anaconda.org/conda-forge/osx-64/readline-8.2-h9e318b2_1.conda#f17f77f2acf4d344734bda76829ce14e
 https://conda.anaconda.org/conda-forge/osx-64/tapi-1100.0.11-h9ce4665_0.tar.bz2#f9ff42ccf809a21ba6f8607f8de36108
-https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.9.0-hb8565cd_0.conda#6aedf8fdcdf5f2d7b4db21853a7d42ed
+https://conda.anaconda.org/conda-forge/osx-64/tbb-2021.10.0-h1c7c39f_0.conda#7d866c2f94d867282a403460cfa8b3f8
 https://conda.anaconda.org/conda-forge/osx-64/tk-8.6.12-h5dbffcc_0.tar.bz2#8e9480d9c47061db2ed1b4ecce519a7f
-https://conda.anaconda.org/conda-forge/osx-64/zlib-1.2.13-hfd90126_4.tar.bz2#be90e6223c74ea253080abae19b3bdb1
-https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.2-hbc0c0cd_6.conda#40a188783d3c425bdccc9ae9104acbb8
-https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.0.9-hb7f2c08_8.tar.bz2#aac5ad0d8f747ef7f871508146df75d9
+https://conda.anaconda.org/conda-forge/osx-64/zlib-1.2.13-h8a1eda9_5.conda#75a8a98b1c4671c5d2897975731da42d
+https://conda.anaconda.org/conda-forge/osx-64/zstd-1.5.5-h829000d_0.conda#80abc41d0c48b82fe0f04e7f42f5cb7e
+https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h0dc2134_0.conda#fdff11974d36d586c4e5aeae3b9a9a79
 https://conda.anaconda.org/conda-forge/osx-64/freetype-2.12.1-h3f81eb7_1.conda#852224ea3e8991a8342228eab274840e
-https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp14-14.0.6-default_hdb78580_1.conda#9a235664bf087994aa3acc1a60614964
-https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-11_3_0_h97931a8_31.conda#97451338600bd9c5b535eb224ef6c471
-https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.5.0-hedf67fa_6.conda#800b810c1aa3eb4a08106698441871bb
-https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-14.0.6-hc8e404f_3.conda#3bebd091daab84c54f91205bb4d4a9c3
+https://conda.anaconda.org/conda-forge/osx-64/libgfortran-5.0.0-12_3_0_h97931a8_1.conda#3dfbc4ce09c598763cffcc667686f412
+https://conda.anaconda.org/conda-forge/osx-64/libllvm15-15.0.7-he4b1e75_3.conda#ecc6df80c4b0445ac0de9cabae189db3
+https://conda.anaconda.org/conda-forge/osx-64/libtiff-4.5.1-hf955e92_1.conda#3436c5763732687918ce258b0184c7c9
 https://conda.anaconda.org/conda-forge/osx-64/mkl-2022.1.0-h860c996_928.tar.bz2#98a4d58de0ba6e61ce46620b775c19ce
 https://conda.anaconda.org/conda-forge/osx-64/mpfr-4.2.0-h4f9bd69_0.conda#f48a2f4515be334c5cfeed82517b96e0
-https://conda.anaconda.org/conda-forge/osx-64/python-3.11.3-h99528f9_0_cpython.conda#c3291f9411424fc587d53a2ea57fb075
+https://conda.anaconda.org/conda-forge/osx-64/python-3.11.5-h30d4d87_0_cpython.conda#ef2b263b5b02d2acf00908bb07c14b12
 https://conda.anaconda.org/conda-forge/osx-64/sigtool-0.1.3-h88f4db0_0.tar.bz2#fbfb84b9de9a6939cb165c02c69b1865
-https://conda.anaconda.org/conda-forge/osx-64/brotli-1.0.9-hb7f2c08_8.tar.bz2#55f612fe4a9b5f6ac76348b6de94aaeb
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
-https://conda.anaconda.org/conda-forge/osx-64/clang-14-14.0.6-default_hdb78580_1.conda#ce19ccaee311132f299ffd0eec9c4581
+https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h0dc2134_0.conda#5dcb7899e967959932fda0a921016d35
+https://conda.anaconda.org/conda-forge/osx-64/brotli-python-1.1.0-py311hdf8f085_0.conda#ee00aa713ef0dffaebf0b6a8316cc9a2
+https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
-https://conda.anaconda.org/conda-forge/osx-64/cython-0.29.35-py311hdf8f085_0.conda#29e8e9b57704e153d6a5ffced82262da
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
+https://conda.anaconda.org/conda-forge/osx-64/cython-3.0.2-py311hdf8f085_0.conda#80da6addfe0c2ca2ac2b3498f8ee0e71
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.4-py311hd2070f0_1.tar.bz2#5219e72a43e53e8f6af4fdf76a0f90ef
+https://conda.anaconda.org/conda-forge/osx-64/kiwisolver-1.4.5-py311h5fe6e05_0.conda#1cdd04e72bf6f6dc4152bda4e45dd6b9
 https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.15-h2dcdeff_1.conda#f1df9b0c2d9fbe985e62f4b24773a9e4
-https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-609-hfd63004_13.conda#58fcda6a84fb42f51c6c2d6d175b435d
+https://conda.anaconda.org/conda-forge/osx-64/ld64_osx-64-609-h0fd476b_14.conda#3aa0a91888b5c1771630b86f42c9d2da
 https://conda.anaconda.org/conda-forge/osx-64/libblas-3.9.0-17_osx64_mkl.conda#e5d4b69958f8eb30b932828880b847f3
+https://conda.anaconda.org/conda-forge/osx-64/libclang-cpp15-15.0.7-default_hdb78580_3.conda#73639154fe4a7ca500d1361eef58fb65
 https://conda.anaconda.org/conda-forge/osx-64/libhiredis-1.0.2-h2beb688_0.tar.bz2#524282b2c46c9dedf051b3bc2ae05494
+https://conda.anaconda.org/conda-forge/osx-64/llvm-tools-15.0.7-he4b1e75_3.conda#7177e9334a86af1b1581f14607ced61c
 https://conda.anaconda.org/conda-forge/osx-64/mkl-devel-2022.1.0-h694c41f_929.tar.bz2#041ceef009fe6d29cbd2555907c23ab3
 https://conda.anaconda.org/conda-forge/osx-64/mpc-1.3.1-h81bd1dd_0.conda#c752c0eb6c250919559172c011e5f65b
 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
 https://conda.anaconda.org/conda-forge/osx-64/openjpeg-2.5.0-h13ac156_2.conda#299a29af9ac9f550ad459d655739280b
 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc
 https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
 https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34
 https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31
-https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe
+https://conda.anaconda.org/conda-forge/noarch/setuptools-68.1.2-pyhd8ed1ab_0.conda#4fe12573bf499ff85a0a364e00cc5c53
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
-https://conda.anaconda.org/conda-forge/osx-64/tornado-6.3.2-py311h2725bcf_0.conda#276fe4341e39dcd9d9d33ca18140d2e7
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf
+https://conda.anaconda.org/conda-forge/osx-64/tornado-6.3.3-py311h2725bcf_0.conda#2e29e821b0448e8e8ab627f202554575
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.7.1-pyha770c72_0.conda#c39d6a09fe819de4951c2642629d9115
 https://conda.anaconda.org/conda-forge/osx-64/ccache-4.8.1-h28e096f_0.conda#dcc8cc97fdab7a5fad9e1a6bbad9ed0e
-https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-973.0.1-hcc6d90d_13.conda#76e5fa849e2042cd657d9eec96095680
-https://conda.anaconda.org/conda-forge/osx-64/clang-14.0.6-h694c41f_1.conda#1305da4c85c7eaa2e90fa14efc35f591
-https://conda.anaconda.org/conda-forge/osx-64/coverage-7.2.7-py311h2725bcf_0.conda#afba3a3f74c5f71ebd9f400871e8c4de
-https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.39.4-py311h2725bcf_0.conda#250388f6d2c5a20066a95cf872e22495
-https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-11.3.0-h1f927f5_31.conda#926da9259d77f6a95d60c5a956425c2f
-https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
-https://conda.anaconda.org/conda-forge/osx-64/ld64-609-hc6ad406_13.conda#5d7676eee44dfa3e48bf21700e044aa9
+https://conda.anaconda.org/conda-forge/osx-64/cctools_osx-64-973.0.1-habff3f6_14.conda#f08fbd275d1df88503af3ad946c08582
+https://conda.anaconda.org/conda-forge/osx-64/clang-15-15.0.7-default_hdb78580_3.conda#688d6b9e178cb7786a07e3cfca2a8f09
+https://conda.anaconda.org/conda-forge/osx-64/coverage-7.3.0-py311h2725bcf_0.conda#0144292bfb9973f2cce32e1a4a15d8f7
+https://conda.anaconda.org/conda-forge/osx-64/fonttools-4.42.1-py311h2725bcf_0.conda#e3840e7b277a04726126900dbafd3036
+https://conda.anaconda.org/conda-forge/osx-64/gfortran_impl_osx-64-12.3.0-h54fd467_1.conda#5f4d40236e204c6e62cd0a316244f316
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
+https://conda.anaconda.org/conda-forge/osx-64/ld64-609-ha91a046_14.conda#ec7082eb79ea5db88c97b7bcad3db986
 https://conda.anaconda.org/conda-forge/osx-64/libcblas-3.9.0-17_osx64_mkl.conda#5adcad22978f80fa101047022e79d9eb
 https://conda.anaconda.org/conda-forge/osx-64/liblapack-3.9.0-17_osx64_mkl.conda#5557060dea295fcbb224be17b3947d16
-https://conda.anaconda.org/conda-forge/osx-64/pillow-9.5.0-py311h7cb0e2d_1.conda#bf4feca7fd63e619c39ab32eac625edf
+https://conda.anaconda.org/conda-forge/osx-64/pillow-10.0.0-py311h7cb0e2d_0.conda#042cee47581520be03136d16e8cc0969
+https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d
-https://conda.anaconda.org/conda-forge/osx-64/cctools-973.0.1-h76f1dac_13.conda#802cae917abdc5a7cdfa699ff02da42d
-https://conda.anaconda.org/conda-forge/osx-64/clangxx-14.0.6-default_hdb78580_1.conda#cc2ac1c5c838cb0edd65258da7c38294
+https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.7.1-hd8ed1ab_0.conda#f96688577f1faa58096d06a45136afa2
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e
+https://conda.anaconda.org/conda-forge/osx-64/cctools-973.0.1-hd9ad811_14.conda#2fbb98b0cef591025f5f1b17730a0b5b
+https://conda.anaconda.org/conda-forge/osx-64/clang-15.0.7-h694c41f_3.conda#8a48d466e519b8db7dda7c5d27cc1d31
 https://conda.anaconda.org/conda-forge/osx-64/liblapacke-3.9.0-17_osx64_mkl.conda#678af3918e54ac46249290a05e7e69b1
-https://conda.anaconda.org/conda-forge/osx-64/numpy-1.24.3-py311hc44ba51_0.conda#6c4b3bbdc10013352324d4cc366edb17
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4
+https://conda.anaconda.org/conda-forge/osx-64/numpy-1.25.2-py311hc44ba51_0.conda#e45d265a53efa94a8e8e94392fab71e0
+https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236
+https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda#06eb685a3a0b146347a58dda979485da
+https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
 https://conda.anaconda.org/conda-forge/osx-64/blas-devel-3.9.0-17_osx64_mkl.conda#b40b415e2be4d0d2a8d05d0f805240b7
-https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-14.0.6-hab78ec2_0.tar.bz2#4fdde3f4ed31722a1c811723f5db82f0
-https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.0.7-py311hd2070f0_0.conda#d78f75103409d2c7a8774c873821ae9a
-https://conda.anaconda.org/conda-forge/osx-64/pandas-2.0.2-py311hab14417_0.conda#a490b12cf9ba39a6968000e93826c283
+https://conda.anaconda.org/conda-forge/osx-64/clangxx-15.0.7-default_hdb78580_3.conda#58df9ff86fefc7684670be729b41412f
+https://conda.anaconda.org/conda-forge/osx-64/contourpy-1.1.0-py311h5fe6e05_0.conda#1969042c846644a15c25ea78f487459c
+https://conda.anaconda.org/conda-forge/osx-64/pandas-2.1.0-py311hab14417_0.conda#e081bf78c37d2671a7d442a56c3a8728
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6
-https://conda.anaconda.org/conda-forge/noarch/pytest-cov-4.1.0-pyhd8ed1ab_0.conda#06eb685a3a0b146347a58dda979485da
-https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
-https://conda.anaconda.org/conda-forge/osx-64/blas-2.117-mkl.conda#4c921079b5298ce08bb336fc025b96d7
-https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-14.0.6-h613da45_0.tar.bz2#b44e0625319f9933e584dc3b96f5baf7
-https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.7.1-py311h2bf763f_0.conda#d67ac9c9b834ae77ff7b2c59f702803c
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
-https://conda.anaconda.org/conda-forge/osx-64/scipy-1.10.1-py311h16c3c4d_3.conda#a3ba8e96a7511ef8c3b61d28a68da6ed
-https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-14.0.6-h3113cd8_6.conda#1b191288877fac1564184b28ce07de84
-https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.7.1-py311h6eed73b_0.conda#c112be16f02d1c68de63ae3ec6fc7db4
-https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.0.0-py311h349b758_0.conda#a6c92bfaa34aa9c3211ede51e683c43f
-https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.5.2-hbf74d83_0.conda#c1413ef5a20d658923e12dd3b566d8f3
-https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-14.0.6-h6f97653_6.conda#3989d08f74e7d987e94d9003cea30080
-https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-11.3.0-h18f7dce_1.conda#4e066d81dd3b86556b723021980f4ed8
-https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.5.2-hb8565cd_0.conda#349ae14723b98f76ea0fcb8e532b2ead
-https://conda.anaconda.org/conda-forge/osx-64/gfortran-12.2.0-h2c809b3_1.conda#4a5cb3bf02a98991321a1f8ec4d8c817
-https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.5.2-haad3a49_0.conda#649a324b13eb77c6d5e98d36ea0c59f4
-https://conda.anaconda.org/conda-forge/osx-64/compilers-1.5.2-h694c41f_0.conda#1fdd3bc173dad6e7a0439962c7764ab8
+https://conda.anaconda.org/conda-forge/osx-64/blas-2.117-mkl.conda#4c921079b5298ce08bb336fc025b96d7
+https://conda.anaconda.org/conda-forge/noarch/compiler-rt_osx-64-15.0.7-he1888fc_1.conda#e1f93ea86259a549f2dcbfd245bf0422
+https://conda.anaconda.org/conda-forge/osx-64/matplotlib-base-3.7.2-py311haff9b01_0.conda#bd9520e9015e70f3de839ce48c9061ea
+https://conda.anaconda.org/conda-forge/osx-64/scipy-1.11.2-py311h16c3c4d_0.conda#67361fcbfef51366e72588d9ff6c4a5a
+https://conda.anaconda.org/conda-forge/osx-64/compiler-rt-15.0.7-he1888fc_1.conda#8ec296a4b097aeb2d85eafaf745c770a
+https://conda.anaconda.org/conda-forge/osx-64/matplotlib-3.7.2-py311h6eed73b_0.conda#e32f9e5a192246ee550157ac8ffca102
+https://conda.anaconda.org/conda-forge/osx-64/pyamg-5.0.1-py311hd5c4f45_0.conda#e6d0a4faf158e01603bb1cef76fa10ab
+https://conda.anaconda.org/conda-forge/osx-64/clang_osx-64-15.0.7-h03d6864_3.conda#9dfd4e8cbc51c07a7b1ad59ad8415fad
+https://conda.anaconda.org/conda-forge/osx-64/c-compiler-1.6.0-h63c33a9_0.conda#d7f3b8d3a85b4e7eded31adb611bb665
+https://conda.anaconda.org/conda-forge/osx-64/clangxx_osx-64-15.0.7-h2133e9c_3.conda#2ff16b86a981da4b1a2658423db664bb
+https://conda.anaconda.org/conda-forge/osx-64/gfortran_osx-64-12.3.0-h18f7dce_1.conda#436af2384c47aedb94af78a128e174f1
+https://conda.anaconda.org/conda-forge/osx-64/cxx-compiler-1.6.0-h1c7c39f_0.conda#9adaf7c9d4e1e15e70a8dd46befbbab2
+https://conda.anaconda.org/conda-forge/osx-64/gfortran-12.3.0-h2c809b3_1.conda#c48adbaa8944234b80ef287c37e329b0
+https://conda.anaconda.org/conda-forge/osx-64/fortran-compiler-1.6.0-h932d759_0.conda#d2bc049eae716dd6879079ddd209ffc3
+https://conda.anaconda.org/conda-forge/osx-64/compilers-1.6.0-h694c41f_0.conda#d4c66ca84aa87a6c63f4c8a6498052d9
diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml b/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml
index e32b4adc6ea3e..64a33fe7d7522 100644
--- a/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml
+++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_environment.yml
@@ -5,7 +5,7 @@ channels:
   - defaults
 dependencies:
   - python
-  - numpy
+  - numpy<1.25
   - blas[build=mkl]
   - scipy
   - cython
diff --git a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock
index 1e1ae5e4ff3e6..0e987a3ed7de9 100644
--- a/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock
+++ b/build_tools/azure/pylatest_conda_mkl_no_openmp_osx-64_conda.lock
@@ -1,10 +1,10 @@
 # Generated by conda-lock.
 # platform: osx-64
-# input_hash: 808a9ca502dcdd93d1b689ad8ff08d74228790f74a1f707c0054ee97dad6a742
+# input_hash: 03f7604aefb9752d2367c457bdf4e4923158be96db35ac0dd1d5dc60a9981cd1
 @EXPLICIT
 https://repo.anaconda.com/pkgs/main/osx-64/blas-1.0-mkl.conda#cb2c87e85ac8e0ceae776d26d4214c8a
 https://repo.anaconda.com/pkgs/main/osx-64/bzip2-1.0.8-h1de35cc_0.conda#19fcb113b170fe2a0be96b47801fed7d
-https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2023.01.10-hecd8cb5_0.conda#4544150389480f19dd67c20b3bb12d61
+https://repo.anaconda.com/pkgs/main/osx-64/ca-certificates-2023.05.30-hecd8cb5_0.conda#1ab4b0336a311064d05fdd2484df5336
 https://repo.anaconda.com/pkgs/main/osx-64/giflib-5.2.1-h6c40b1e_3.conda#a5ab49bdb6fdc875fb965221241e3bcf
 https://repo.anaconda.com/pkgs/main/osx-64/jpeg-9e-h6c40b1e_1.conda#fc3e61fa41309946c9283fe8737d7f41
 https://repo.anaconda.com/pkgs/main/osx-64/libbrotlicommon-1.0.9-hca72f7f_7.conda#6c865b9e76fa2fad0c8ac32aa0f01f75
@@ -25,28 +25,23 @@ https://repo.anaconda.com/pkgs/main/osx-64/libbrotlienc-1.0.9-hca72f7f_7.conda#e
 https://repo.anaconda.com/pkgs/main/osx-64/libgfortran5-11.3.0-h9dfd629_28.conda#1fa1a27ee100b1918c3021dbfa3895a3
 https://repo.anaconda.com/pkgs/main/osx-64/libpng-1.6.39-h6c40b1e_0.conda#a3c824835f53ad27aeb86d2b55e47804
 https://repo.anaconda.com/pkgs/main/osx-64/lz4-c-1.9.4-hcec6c5f_0.conda#44291e9e6920cfff30caf1299f48db38
-https://repo.anaconda.com/pkgs/main/osx-64/openssl-1.1.1t-hca72f7f_0.conda#5027baac278975d148ee3887b3f4e911
+https://repo.anaconda.com/pkgs/main/osx-64/openssl-3.0.10-hca72f7f_2.conda#2915c036fba59aa6bf4faaf36473619a
 https://repo.anaconda.com/pkgs/main/osx-64/readline-8.2-hca72f7f_0.conda#971667436260e523f6f7355fdfa238bf
 https://repo.anaconda.com/pkgs/main/osx-64/tbb-2021.8.0-ha357a0b_0.conda#fb48530a3eea681c11dafb95b3387c0f
 https://repo.anaconda.com/pkgs/main/osx-64/tk-8.6.12-h5d9f67b_0.conda#047f0af5486d19163e37fd7f8ae3d29f
 https://repo.anaconda.com/pkgs/main/osx-64/brotli-bin-1.0.9-hca72f7f_7.conda#110bdca1a20710820e61f7fa3047f737
 https://repo.anaconda.com/pkgs/main/osx-64/freetype-2.12.1-hd8bbffd_0.conda#1f276af321375ee7fe8056843044fa76
 https://repo.anaconda.com/pkgs/main/osx-64/libgfortran-5.0.0-11_3_0_hecd8cb5_28.conda#2eb13b680803f1064e53873ae0aaafb3
-https://repo.anaconda.com/pkgs/main/osx-64/mkl-2023.1.0-h59209a4_43558.conda#898a058caf42cf8b706034be6e5b2d50
+https://repo.anaconda.com/pkgs/main/osx-64/mkl-2023.1.0-h8e150cf_43559.conda#f5a09d45a003f817d5c43935e20ca0c8
 https://repo.anaconda.com/pkgs/main/osx-64/sqlite-3.41.2-h6c40b1e_0.conda#6947a501943529c7536b7e4ba53802c1
 https://repo.anaconda.com/pkgs/main/osx-64/zstd-1.5.5-hc035e20_0.conda#5e0b7ddb1b7dc6b630e1f9a03499c19c
 https://repo.anaconda.com/pkgs/main/osx-64/brotli-1.0.9-hca72f7f_7.conda#68e54d12ec67591deb2ffd70348fb00f
-https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.5.0-hcec6c5f_2.conda#f0b033a82af1bd028f112cdecef1fe0a
-https://repo.anaconda.com/pkgs/main/osx-64/python-3.11.3-h1fd4e5f_0.conda#df6f985ea9100007789662afeca11311
-https://repo.anaconda.com/pkgs/main/noarch/appdirs-1.4.4-pyhd3eb1b0_0.conda#5673d98d06171cb6eed03a6736845c4d
-https://repo.anaconda.com/pkgs/main/osx-64/attrs-22.1.0-py311hecd8cb5_0.conda#d87b931f00c25263ede3d7ec691389af
-https://repo.anaconda.com/pkgs/main/osx-64/certifi-2023.5.7-py311hecd8cb5_0.conda#c7cb5a9de1041b8b59f92089bd9aa55e
-https://repo.anaconda.com/pkgs/main/noarch/charset-normalizer-2.0.4-pyhd3eb1b0_0.conda#e7a441d94234b2b5fafee06e25dbf076
+https://repo.anaconda.com/pkgs/main/osx-64/libtiff-4.5.1-hcec6c5f_0.conda#e127a800ffd9d300ed7d5e1b026944ec
+https://repo.anaconda.com/pkgs/main/osx-64/python-3.11.4-hf27a42d_0.conda#7ad1265574193e18e9beaa16879734dc
 https://repo.anaconda.com/pkgs/main/osx-64/coverage-7.2.2-py311h6c40b1e_0.conda#e15605553450156cf75c3ae38a920475
 https://repo.anaconda.com/pkgs/main/noarch/cycler-0.11.0-pyhd3eb1b0_0.conda#f5e365d2cdb66d547eb8c3ab93843aab
-https://repo.anaconda.com/pkgs/main/osx-64/cython-0.29.33-py311hcec6c5f_0.conda#9865281df3b2e61f46dc189ae46c5abc
+https://repo.anaconda.com/pkgs/main/osx-64/cython-3.0.0-py311h6c40b1e_0.conda#f1831f4c643b4653ecb777477763f9cc
 https://repo.anaconda.com/pkgs/main/noarch/execnet-1.9.0-pyhd3eb1b0_0.conda#f895937671af67cebb8af617494b3513
-https://repo.anaconda.com/pkgs/main/osx-64/idna-3.4-py311hecd8cb5_0.conda#48ab3e9b53e5607abe86a920cd37e13a
 https://repo.anaconda.com/pkgs/main/noarch/iniconfig-1.1.1-pyhd3eb1b0_0.tar.bz2#e40edff2c5708f342cef43c7f280c507
 https://repo.anaconda.com/pkgs/main/osx-64/joblib-1.2.0-py311hecd8cb5_0.conda#af8c1fcd4e8e0c6fa2a4f4ecda261dc9
 https://repo.anaconda.com/pkgs/main/osx-64/kiwisolver-1.4.4-py311hcec6c5f_0.conda#f2cf31e2a762f071fd6bc4d74ea2bfc8
@@ -54,34 +49,25 @@ https://repo.anaconda.com/pkgs/main/osx-64/lcms2-2.12-hf1fd2bf_0.conda#697aba7a3
 https://repo.anaconda.com/pkgs/main/osx-64/libwebp-1.2.4-hf6ce154_1.conda#07d0981c3847293d4aea5778298a12d3
 https://repo.anaconda.com/pkgs/main/osx-64/mkl-service-2.4.0-py311h6c40b1e_1.conda#f709b80c57a0fcc577319920d1b7228b
 https://repo.anaconda.com/pkgs/main/noarch/munkres-1.1.4-py_0.conda#148362ba07f92abab76999a680c80084
-https://repo.anaconda.com/pkgs/main/osx-64/packaging-23.0-py311hecd8cb5_0.conda#456989f87701680b35cab3edc49e223d
+https://repo.anaconda.com/pkgs/main/osx-64/packaging-23.1-py311hecd8cb5_0.conda#4f5c491cd2de9d61f61c0ea3340ab46a
 https://repo.anaconda.com/pkgs/main/osx-64/pluggy-1.0.0-py311hecd8cb5_1.conda#98e4da64cd934965a0caf4136280ff35
 https://repo.anaconda.com/pkgs/main/noarch/py-1.11.0-pyhd3eb1b0_0.conda#7205a898ed2abbf6e9b903dff6abe08e
-https://repo.anaconda.com/pkgs/main/noarch/pycparser-2.21-pyhd3eb1b0_0.conda#135a72ff2a31150a3a3ff0b1edd41ca9
 https://repo.anaconda.com/pkgs/main/osx-64/pyparsing-3.0.9-py311hecd8cb5_0.conda#a4262f849ecc82af69f58da0cbcaaf04
-https://repo.anaconda.com/pkgs/main/osx-64/pysocks-1.7.1-py311hecd8cb5_0.conda#6a9c1a311e30a9776b3297fe1480fa38
+https://repo.anaconda.com/pkgs/main/noarch/python-tzdata-2023.3-pyhd3eb1b0_0.conda#479c037de0186d114b9911158427624e
 https://repo.anaconda.com/pkgs/main/osx-64/pytz-2022.7-py311hecd8cb5_0.conda#87c5590ad0bdf9c5c76feb22b7fbd5ba
-https://repo.anaconda.com/pkgs/main/osx-64/setuptools-67.8.0-py311hecd8cb5_0.conda#9a01cd68b3c26dbdb25f31ee5b32819f
+https://repo.anaconda.com/pkgs/main/osx-64/setuptools-68.0.0-py311hecd8cb5_0.conda#ad594daf4f91ef9b89b10b0f4b2c9e10
 https://repo.anaconda.com/pkgs/main/noarch/six-1.16.0-pyhd3eb1b0_1.conda#34586824d411d36af2fa40e799c172d0
 https://repo.anaconda.com/pkgs/main/noarch/threadpoolctl-2.2.0-pyh0d69192_0.conda#bbfdbae4934150b902f97daaf287efe2
 https://repo.anaconda.com/pkgs/main/noarch/toml-0.10.2-pyhd3eb1b0_0.conda#cda05f5f6d8509529d1a2743288d197a
-https://repo.anaconda.com/pkgs/main/osx-64/tomli-2.0.1-py311hecd8cb5_0.conda#d69dd2914a3eb8cf856a14455dd3f458
-https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.2-py311h6c40b1e_0.conda#04ec029d2ac86baa6140fd0a36c971b6
-https://repo.anaconda.com/pkgs/main/osx-64/cffi-1.15.1-py311h6c40b1e_3.conda#5eb14a7a7187a7593f09dafc7a26ff23
+https://repo.anaconda.com/pkgs/main/osx-64/tornado-6.3.2-py311h6c40b1e_0.conda#0dcfb37496c5564e896428ae56ab3e95
 https://repo.anaconda.com/pkgs/main/noarch/fonttools-4.25.0-pyhd3eb1b0_0.conda#bb9c5b5a6d892fca5efe4bf0203b6a48
 https://repo.anaconda.com/pkgs/main/osx-64/numpy-base-1.24.3-py311h53bf9ac_1.conda#1b1957e3823208a006d0699999335c7d
 https://repo.anaconda.com/pkgs/main/osx-64/pillow-9.4.0-py311hcec6c5f_0.conda#fccbb731e918b59d44372354ff2e24f9
-https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.3.1-py311hecd8cb5_0.conda#0247a6236ee44b38f6f0dc54ca3cbe7a
+https://repo.anaconda.com/pkgs/main/osx-64/pytest-7.4.0-py311hecd8cb5_0.conda#8c5496a4a1f36160ac5556495faa4a24
 https://repo.anaconda.com/pkgs/main/noarch/python-dateutil-2.8.2-pyhd3eb1b0_0.conda#211ee00320b08a1ac9fea6677649f6c9
-https://repo.anaconda.com/pkgs/main/osx-64/brotlipy-0.7.0-py311h6c40b1e_1002.conda#214a3acdf6f828a764263d430826688b
-https://repo.anaconda.com/pkgs/main/osx-64/cryptography-39.0.1-py311hf6deb26_0.conda#baf00061474e2c639029b0208d3eaf2e
 https://repo.anaconda.com/pkgs/main/osx-64/pytest-cov-4.0.0-py311hecd8cb5_0.conda#c63893569d344f4297f2ae08e0387ccf
 https://repo.anaconda.com/pkgs/main/noarch/pytest-forked-1.3.0-pyhd3eb1b0_0.tar.bz2#07970bffdc78f417d7f8f1c7e620f5c4
-https://repo.anaconda.com/pkgs/main/osx-64/pyopenssl-23.0.0-py311hecd8cb5_0.conda#d034f753f088967f765030dc5742c1d7
 https://repo.anaconda.com/pkgs/main/noarch/pytest-xdist-2.5.0-pyhd3eb1b0_0.conda#d15cdc4207bcf8ca920822597f1d138d
-https://repo.anaconda.com/pkgs/main/osx-64/urllib3-1.26.15-py311hecd8cb5_0.conda#2ce7c8e3fe61096e275f3d078485f7b6
-https://repo.anaconda.com/pkgs/main/osx-64/requests-2.29.0-py311hecd8cb5_0.conda#5ea75ca544f2a7b0a2660368bf886006
-https://repo.anaconda.com/pkgs/main/noarch/pooch-1.4.0-pyhd3eb1b0_0.conda#69ec83cb3d152f9e854115555004f368
 https://repo.anaconda.com/pkgs/main/osx-64/bottleneck-1.3.5-py311hb9e55a9_0.conda#5aa1b58b421d4608b16184f8468253ef
 https://repo.anaconda.com/pkgs/main/osx-64/contourpy-1.0.5-py311ha357a0b_0.conda#a130f83ba4b5d008e0c134c73e10b8fb
 https://repo.anaconda.com/pkgs/main/osx-64/matplotlib-3.7.1-py311hecd8cb5_1.conda#6ec92c9f01ff593b177da73ab17e9f54
@@ -90,6 +76,6 @@ https://repo.anaconda.com/pkgs/main/osx-64/mkl_fft-1.3.6-py311hdb55bb0_1.conda#d
 https://repo.anaconda.com/pkgs/main/osx-64/mkl_random-1.2.2-py311hdb55bb0_1.conda#9b1de8f6e280fb8e74f186007a0b4ca4
 https://repo.anaconda.com/pkgs/main/osx-64/numpy-1.24.3-py311h728a8a3_1.conda#68069c79ebb0cdd2561026a909a57183
 https://repo.anaconda.com/pkgs/main/osx-64/numexpr-2.8.4-py311h728a8a3_1.conda#be9facbd68b7476262684afb69fd2841
-https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.10.1-py311h224febf_1.conda#a3ae336a401d47b73b17c3b5d780de78
-https://repo.anaconda.com/pkgs/main/osx-64/pandas-1.5.3-py311hc5848a5_0.conda#4111406bad69018aa5e1cb04561a4374
+https://repo.anaconda.com/pkgs/main/osx-64/scipy-1.11.1-py311h224febf_0.conda#ddae5ebff1fd56d3900250861cd2d4b9
+https://repo.anaconda.com/pkgs/main/osx-64/pandas-2.0.3-py311hdb55bb0_0.conda#d23c89f98d12773c3cc8d16bd6206711
 https://repo.anaconda.com/pkgs/main/osx-64/pyamg-4.2.3-py311h37a6a59_0.conda#5fca7d043dc68c1d7acc22aa03a24918
diff --git a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock
index eab6dc087f26d..865546a447b7b 100644
--- a/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock
+++ b/build_tools/azure/pylatest_pip_openblas_pandas_linux-64_conda.lock
@@ -1,9 +1,9 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: 61862ec58344ddfaad255f4687ca311eb7e2e61001e209d63f0cc92f97178848
+# input_hash: d01d23bd27bcd50d2b3643492f966c8e390822d72b69f31bf66c2fe98a265a4c
 @EXPLICIT
 https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9
-https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.01.10-h06a4308_0.conda#7704989a2ccf6c1f5a50c985509841c4
+https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.05.30-h06a4308_0.conda#979be8dd2368decd342b13e01540d297
 https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b
 https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023c-h04d1e81_0.conda#29db02adf8808f7c64642cead3e28acd
 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd
@@ -12,78 +12,77 @@ https://repo.anaconda.com/pkgs/main/linux-64/_openmp_mutex-5.1-1_gnu.conda#71d28
 https://repo.anaconda.com/pkgs/main/linux-64/libgcc-ng-11.2.0-h1234567_1.conda#a87728dabf3151fb9cfa990bd2eb0464
 https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339
 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c
-https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1t-h7f8727e_0.conda#0410db682c02665511bd4203ade48a32
+https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.10-h7f8727e_2.conda#066a828cc9dcd120af8c503381d6a1b8
 https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda#bcd31de48a0dcb44bc5b99675800c5cc
 https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93
 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e
 https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb
 https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9
 https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5
-https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.16-h7a1cb2a_2.conda#6b4f255f11b3facb3fa17061757b8cc2
-https://repo.anaconda.com/pkgs/main/linux-64/setuptools-67.8.0-py39h06a4308_0.conda#3d40bf5ad5f24b0c96624efd2cff1c80
+https://repo.anaconda.com/pkgs/main/linux-64/python-3.9.17-h955ad1f_0.conda#73d1906010828e497fe284a2ed9fd2cb
+https://repo.anaconda.com/pkgs/main/linux-64/setuptools-68.0.0-py39h06a4308_0.conda#0af0f107fd4cfe0b3b46ce9fe0471873
 https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.38.4-py39h06a4308_0.conda#83e731cfecb3797a0f2865615177f433
-https://repo.anaconda.com/pkgs/main/linux-64/pip-23.0.1-py39h06a4308_0.conda#e36d76b4611ca9b5d8bd180232aecbac
+https://repo.anaconda.com/pkgs/main/linux-64/pip-23.2.1-py39h06a4308_0.conda#e337f9c2951f8ad2a1276b91c04d47e4
 # pip alabaster @ https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl#sha256=1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3
 # pip babel @ https://files.pythonhosted.org/packages/df/c4/1088865e0246d7ecf56d819a233ab2b72f7d6ab043965ef327d0731b5434/Babel-2.12.1-py3-none-any.whl#sha256=b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610
-# pip certifi @ https://files.pythonhosted.org/packages/9d/19/59961b522e6757f0c9097e4493fa906031b95b3ebe9360b2c3083561a6b4/certifi-2023.5.7-py3-none-any.whl#sha256=c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716
-# pip charset-normalizer @ https://files.pythonhosted.org/packages/33/97/9967fb2d364a9da38557e4af323abcd58cc05bdd8f77e9fd5ae4882772cc/charset_normalizer-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=21fa558996782fc226b529fdd2ed7866c2c6ec91cee82735c98a197fae39f706
+# pip certifi @ https://files.pythonhosted.org/packages/4c/dd/2234eab22353ffc7d94e8d13177aaa050113286e93e7b40eae01fbf7c3d9/certifi-2023.7.22-py3-none-any.whl#sha256=92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9
+# pip charset-normalizer @ https://files.pythonhosted.org/packages/f9/0d/514be8597d7a96243e5467a37d337b9399cec117a513fcf9328405d911c0/charset_normalizer-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=8700f06d0ce6f128de3ccdbc1acaea1ee264d2caa9ca05daaf492fde7c2a7200
 # pip cycler @ https://files.pythonhosted.org/packages/5c/f9/695d6bedebd747e5eb0fe8fad57b72fdf25411273a39791cde838d5a8f51/cycler-0.11.0-py3-none-any.whl#sha256=3a27e95f763a428a739d2add979fa7494c912a32c17c4c38c4d5f082cad165a3
-# pip cython @ https://files.pythonhosted.org/packages/01/fd/5e489abe8ee99a52366b5ae99518b64f6024c6dd331b4d75a6a9ac48f429/Cython-0.29.35-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl#sha256=c4cd7de707938b8385cd1f88e1446228fbfe09af7822fa13877a4374c4881198
+# pip cython @ https://files.pythonhosted.org/packages/18/f1/c3918a7a367a17d5c07d8e576c51ba78fc807f214f748026876352f8b0c2/Cython-3.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=08d67c7225a09eeb77e090c8d4f60677165b052ccf76e3a57d8237064e5c2de2
 # pip docutils @ https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl#sha256=96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6
-# pip exceptiongroup @ https://files.pythonhosted.org/packages/61/97/17ed81b7a8d24d8f69b62c0db37abbd8c0042d4b3fc429c73dab986e7483/exceptiongroup-1.1.1-py3-none-any.whl#sha256=232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e
-# pip execnet @ https://files.pythonhosted.org/packages/81/c0/3072ecc23f4c5e0a1af35e3a222855cfd9c80a1a105ca67be3b6172637dd/execnet-1.9.0-py2.py3-none-any.whl#sha256=a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142
-# pip fonttools @ https://files.pythonhosted.org/packages/ad/5f/20da4f41e33e77723b0100ded6539529bd159319ed49d6459a4647cdc7ee/fonttools-4.39.4-py3-none-any.whl#sha256=106caf6167c4597556b31a8d9175a3fdc0356fdcd70ab19973c3b0d4c893c461
+# pip exceptiongroup @ https://files.pythonhosted.org/packages/ad/83/b71e58666f156a39fb29417e4c8ca4bc7400c0dd4ed9e8842ab54dc8c344/exceptiongroup-1.1.3-py3-none-any.whl#sha256=343280667a4585d195ca1cf9cef84a4e178c4b6cf2274caef9859782b567d5e3
+# pip execnet @ https://files.pythonhosted.org/packages/e8/9c/a079946da30fac4924d92dbc617e5367d454954494cf1e71567bcc4e00ee/execnet-2.0.2-py3-none-any.whl#sha256=88256416ae766bc9e8895c76a87928c0012183da3cc4fc18016e6f050e025f41
+# pip fonttools @ https://files.pythonhosted.org/packages/49/50/2e31753c088d364756daa5bed0dab6a5928ebfd6e6d26f975c8b6d6f754a/fonttools-4.42.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=7cc7d685b8eeca7ae69dc6416833fbfea61660684b7089bca666067cb2937dcf
 # pip idna @ https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl#sha256=90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2
 # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b
 # pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
-# pip joblib @ https://files.pythonhosted.org/packages/91/d4/3b4c8e5a30604df4c7518c562d4bf0502f2fa29221459226e140cf846512/joblib-1.2.0-py3-none-any.whl#sha256=091138ed78f800342968c523bdde947e7a305b8594b910a0fea2ab83c3c6d385
-# pip kiwisolver @ https://files.pythonhosted.org/packages/a4/36/c414d75be311ce97ef7248edcc4fc05afae2998641bf6b592d43a9dee581/kiwisolver-1.4.4-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=7c43e1e1206cd421cd92e6b3280d4385d41d7166b3ed577ac20444b6995a445f
-# pip lazy-loader @ https://files.pythonhosted.org/packages/a1/a8/c41f46b47a381bd60a40c0ef00d2fd1722b743b178f9c1cec0da949043de/lazy_loader-0.2-py3-none-any.whl#sha256=c35875f815c340f823ce3271ed645045397213f961b40ad0c0d395c3f5218eeb
+# pip joblib @ https://files.pythonhosted.org/packages/10/40/d551139c85db202f1f384ba8bcf96aca2f329440a844f924c8a0040b6d02/joblib-1.3.2-py3-none-any.whl#sha256=ef4331c65f239985f3f2220ecc87db222f08fd22097a3dd5698f693875f8cbb9
+# pip kiwisolver @ https://files.pythonhosted.org/packages/c0/a8/841594f11d0b88d8aeb26991bc4dac38baa909dc58d0c4262a4f7893bcbf/kiwisolver-1.4.5-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=6c3bd3cde54cafb87d74d8db50b909705c62b17c2099b8f2e25b461882e544ff
+# pip lazy-loader @ https://files.pythonhosted.org/packages/a1/c3/65b3814e155836acacf720e5be3b5757130346670ac454fee29d3eda1381/lazy_loader-0.3-py3-none-any.whl#sha256=1e9e76ee8631e264c62ce10006718e80b2cfc74340d17d1031e0f84af7478554
 # pip markupsafe @ https://files.pythonhosted.org/packages/de/63/cb7e71984e9159ec5f45b5e81e896c8bdd0e45fe3fc6ce02ab497f0d790e/MarkupSafe-2.1.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e
 # pip networkx @ https://files.pythonhosted.org/packages/a8/05/9d4f9b78ead6b2661d6e8ea772e111fc4a9fbd866ad0c81906c11206b55e/networkx-3.1-py3-none-any.whl#sha256=4f33f68cb2afcf86f28a45f43efc27a9386b535d567d2127f8f61d51dec58d36
-# pip numpy @ https://files.pythonhosted.org/packages/83/be/de078ac5e4ff572b1bdac1808b77cea2013b2c6286282f89b1de3e951273/numpy-1.24.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=210461d87fb02a84ef243cac5e814aad2b7f4be953b32cb53327bb49fd77fbb4
+# pip numpy @ https://files.pythonhosted.org/packages/69/1f/c95b1108a9972a52d7b1b63ed8ca70466b59b8c1811bd121f1e667cc45d8/numpy-1.25.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=d7806500e4f5bdd04095e849265e55de20d8cc4b661b038957354327f6d9b295
 # pip packaging @ https://files.pythonhosted.org/packages/ab/c3/57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121/packaging-23.1-py3-none-any.whl#sha256=994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61
-# pip pillow @ https://files.pythonhosted.org/packages/ff/fc/48a51c0fe2a00d5def57b9981a1e0f8339b516351da7a51500383d833bc8/Pillow-9.5.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=608488bdcbdb4ba7837461442b90ea6f3079397ddc968c31265c1e056964f1ef
-# pip pluggy @ https://files.pythonhosted.org/packages/9e/01/f38e2ff29715251cf25532b9082a1589ab7e4f571ced434f98d0139336dc/pluggy-1.0.0-py2.py3-none-any.whl#sha256=74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3
+# pip pillow @ https://files.pythonhosted.org/packages/eb/3a/023761d323f51b932ba8aa70bfe9c987f5fa094ffbaba9cd9295b8eee429/Pillow-10.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=9f7c16705f44e0504a3a2a14197c1f0b32a95731d251777dcb060aa83022cb2d
+# pip pluggy @ https://files.pythonhosted.org/packages/05/b8/42ed91898d4784546c5f06c60506400548db3f7a4b3fb441cba4e5c17952/pluggy-1.3.0-py3-none-any.whl#sha256=d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7
 # pip py @ https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl#sha256=607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378
-# pip pygments @ https://files.pythonhosted.org/packages/34/a7/37c8d68532ba71549db4212cb036dbd6161b40e463aba336770e80c72f84/Pygments-2.15.1-py3-none-any.whl#sha256=db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1
+# pip pygments @ https://files.pythonhosted.org/packages/43/88/29adf0b44ba6ac85045e63734ae0997d3c58d8b1a91c914d240828d0d73d/Pygments-2.16.1-py3-none-any.whl#sha256=13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692
 # pip pyparsing @ https://files.pythonhosted.org/packages/6c/10/a7d0fa5baea8fe7b50f448ab742f26f52b80bfca85ac2be9d35cdd9a3246/pyparsing-3.0.9-py3-none-any.whl#sha256=5026bae9a10eeaefb61dab2f09052b9f4307d44aee4eda64b309723d8d206bbc
 # pip pytz @ https://files.pythonhosted.org/packages/7f/99/ad6bd37e748257dd70d6f85d916cafe79c0b0f5e2e95b11f7fbc82bf3110/pytz-2023.3-py2.py3-none-any.whl#sha256=a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb
 # pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
 # pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a
-# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl#sha256=29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228
-# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl#sha256=8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e
-# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl#sha256=c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903
 # pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178
-# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl#sha256=bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6
-# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl#sha256=352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd
-# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b
+# pip threadpoolctl @ https://files.pythonhosted.org/packages/81/12/fd4dea011af9d69e1cad05c75f3f7202cdcbeac9b712eea58ca779a72865/threadpoolctl-3.2.0-py3-none-any.whl#sha256=2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032
 # pip tomli @ https://files.pythonhosted.org/packages/97/75/10a9ebee3fd790d20926a90a2547f0bf78f371b2f13aa822c759680ca7b9/tomli-2.0.1-py3-none-any.whl#sha256=939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc
-# pip typing-extensions @ https://files.pythonhosted.org/packages/5f/86/d9b1518d8e75b346a33eb59fa31bdbbee11459a7e2cc5be502fa779e96c5/typing_extensions-4.6.3-py3-none-any.whl#sha256=88a4153d8505aabbb4e13aacb7c486c2b4a33ca3b3f807914a9b4c844c471c26
+# pip typing-extensions @ https://files.pythonhosted.org/packages/ec/6b/63cc3df74987c36fe26157ee12e09e8f9db4de771e0f3404263117e75b95/typing_extensions-4.7.1-py3-none-any.whl#sha256=440d5dd3af93b060174bf433bccd69b0babc3b15b1a8dca43789fd7f61514b36
 # pip tzdata @ https://files.pythonhosted.org/packages/d5/fb/a79efcab32b8a1f1ddca7f35109a50e4a80d42ac1c9187ab46522b2407d7/tzdata-2023.3-py2.py3-none-any.whl#sha256=7e65763eef3120314099b6939b5546db7adce1e7d6f2e179e3df563c70511eda
-# pip urllib3 @ https://files.pythonhosted.org/packages/4b/1d/f8383ef593114755429c307449e7717b87044b3bcd5f7860b89b1f759e34/urllib3-2.0.2-py3-none-any.whl#sha256=d055c2f9d38dc53c808f6fdc8eab7360b6fdbbde02340ed25cfbcd817c62469e
-# pip zipp @ https://files.pythonhosted.org/packages/5b/fa/c9e82bbe1af6266adf08afb563905eb87cab83fde00a0a08963510621047/zipp-3.15.0-py3-none-any.whl#sha256=48904fc76a60e542af151aded95726c1a5c34ed43ab4134b597665c86d7ad556
-# pip contourpy @ https://files.pythonhosted.org/packages/c7/97/ba9ace011734cd01b63eb7d39b2cf97afbfa985b0239ab0db85bafa9b207/contourpy-1.0.7-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e7281244c99fd7c6f27c1c6bfafba878517b0b62925a09b586d88ce750a016d2
-# pip coverage @ https://files.pythonhosted.org/packages/fe/57/e4f8ad64d84ca9e759d783a052795f62a9f9111585e46068845b1cb52c2b/coverage-7.2.7-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=6f48351d66575f535669306aa7d6d6f71bc43372473b54a832222803eb956fd1
-# pip imageio @ https://files.pythonhosted.org/packages/f7/9d/47d0a9d0f267e9155963db8608ffbc448f2b5d4e5414d8e608309f422094/imageio-2.31.0-py3-none-any.whl#sha256=141bbd97910fad105c179a6b344ae4e7fef0dd85411303c63cd925b4c6163bee
-# pip importlib-metadata @ https://files.pythonhosted.org/packages/30/bb/bf2944b8b88c65b797acc2c6a2cb0fb817f7364debf0675792e034013858/importlib_metadata-6.6.0-py3-none-any.whl#sha256=43dd286a2cd8995d5eaef7fee2066340423b818ed3fd70adf0bad5f1fac53fed
-# pip importlib-resources @ https://files.pythonhosted.org/packages/38/71/c13ea695a4393639830bf96baea956538ba7a9d06fcce7cef10bfff20f72/importlib_resources-5.12.0-py3-none-any.whl#sha256=7b1deeebbf351c7578e09bf2f63fa2ce8b5ffec296e0d349139d43cca061a81a
+# pip urllib3 @ https://files.pythonhosted.org/packages/9b/81/62fd61001fa4b9d0df6e31d47ff49cfa9de4af03adecf339c7bc30656b37/urllib3-2.0.4-py3-none-any.whl#sha256=de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4
+# pip zipp @ https://files.pythonhosted.org/packages/8c/08/d3006317aefe25ea79d3b76c9650afabaf6d63d1c8443b236e7405447503/zipp-3.16.2-py3-none-any.whl#sha256=679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0
+# pip contourpy @ https://files.pythonhosted.org/packages/38/6f/5382bdff9dda60cb17cef6dfa2bad3e6edacffd5c2243e282e851c63f721/contourpy-1.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e94bef2580e25b5fdb183bf98a2faa2adc5b638736b2c0a4da98691da641316a
+# pip coverage @ https://files.pythonhosted.org/packages/3b/5c/f4e217d026d0e1faef27dc0b1c7a89798bf5d4b8b013f5b7cceda85efc83/coverage-7.3.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=37d5576d35fcb765fca05654f66aa71e2808d4237d026e64ac8b397ffa66a56a
+# pip imageio @ https://files.pythonhosted.org/packages/7b/88/59411e1a652ac3338d348901ffa5a73daf1f67fcb3f97d750237d4fa0821/imageio-2.31.2-py3-none-any.whl#sha256=a78fbcb33432042a4d6993c87f3ea1f136d908318ce7dda857846ccff73294de
+# pip importlib-metadata @ https://files.pythonhosted.org/packages/cc/37/db7ba97e676af155f5fcb1a35466f446eadc9104e25b83366e8088c9c926/importlib_metadata-6.8.0-py3-none-any.whl#sha256=3ebb78df84a805d7698245025b975d9d67053cd94c79245ba4b3eb694abe68bb
+# pip importlib-resources @ https://files.pythonhosted.org/packages/25/d4/592f53ce2f8dde8be5720851bd0ab71cc2e76c55978e4163ef1ab7e389bb/importlib_resources-6.0.1-py3-none-any.whl#sha256=134832a506243891221b88b4ae1213327eea96ceb4e407a00d790bb0626f45cf
 # pip jinja2 @ https://files.pythonhosted.org/packages/bc/c3/f068337a370801f372f2f8f6bad74a5c140f6fda3d9de154052708dd3c65/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61
-# pip pytest @ https://files.pythonhosted.org/packages/1b/d1/72df649a705af1e3a09ffe14b0c7d3be1fd730da6b98beb4a2ed26b8a023/pytest-7.3.1-py3-none-any.whl#sha256=3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362
+# pip pytest @ https://files.pythonhosted.org/packages/33/b2/741130cbcf2bbfa852ed95a60dc311c9e232c7ed25bac3d9b8880a8df4ae/pytest-7.4.0-py3-none-any.whl#sha256=78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32
 # pip python-dateutil @ https://files.pythonhosted.org/packages/36/7a/87837f39d0296e723bb9b62bbb257d0355c7f6128853c78955f57342a56d/python_dateutil-2.8.2-py2.py3-none-any.whl#sha256=961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
 # pip pywavelets @ https://files.pythonhosted.org/packages/5a/98/4549479a32972bdfdd5e75e168219e97f4dfaee535a8308efef7291e8398/PyWavelets-1.4.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=71ab30f51ee4470741bb55fc6b197b4a2b612232e30f6ac069106f0156342356
 # pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f
-# pip scipy @ https://files.pythonhosted.org/packages/5d/30/b2a2a5bf1a3beefb7609fb871dcc6aef7217c69cef19a4631b7ab5622a8a/scipy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=1b4735d6c28aad3cdcf52117e0e91d6b39acd4272f3f5cd9907c24ee931ad601
+# pip scipy @ https://files.pythonhosted.org/packages/a3/d3/f88285098505c8e5d141678a24bb9620d902c683f11edc1eb9532b02624e/scipy-1.11.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=10eb6af2f751aa3424762948e5352f707b0dece77288206f227864ddf675aca0
 # pip setuptools-scm @ https://files.pythonhosted.org/packages/1d/66/8f42c941be949ef2b22fe905d850c794e7c170a526023612aad5f3a121ad/setuptools_scm-7.1.0-py3-none-any.whl#sha256=73988b6d848709e2af142aa48c986ea29592bbcfca5375678064708205253d8e
-# pip tifffile @ https://files.pythonhosted.org/packages/93/86/2ed10947a1891ceb86b084153fac06877fdec38a5ed69bd9286eefab3d44/tifffile-2023.4.12-py3-none-any.whl#sha256=3161954746fe32c4f4244d0fb2eb0a272f3a3760b78882a42faa83ac5e6e0b74
-# pip matplotlib @ https://files.pythonhosted.org/packages/9f/77/0cd22f92f7103383cb1ce3b3efc77411b9cc3a495242c8f2a623b498f586/matplotlib-3.7.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=f883a22a56a84dba3b588696a2b8a1ab0d2c3d41be53264115c71b0a942d8fdb
-# pip pandas @ https://files.pythonhosted.org/packages/9f/cc/cc8135de2a574fd87940b1d41c9c52d226d3ebc9fc8f6e9f18a7b0a81b57/pandas-2.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=cf3f0c361a4270185baa89ec7ab92ecaa355fe783791457077473f974f654df5
-# pip pyamg @ https://files.pythonhosted.org/packages/1f/fe/a5d365335e9ab2b90ac55552b90779889559b1af01cdbd264f82ee5678bf/pyamg-5.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=2373a0ef913c272c8b6a6d8c2dfcf9a1681a1c6806a5b13b668bcb5125bb46b2
+# pip tifffile @ https://files.pythonhosted.org/packages/12/3e/89513f44a10c625121b7d5bc54390d7ac7f2c92a19755c052888febf9730/tifffile-2023.8.30-py3-none-any.whl#sha256=62364eef35a6fdcc7bc2ad6f97dd270f577efb01b31260ff800af76a66c1e145
+# pip lightgbm @ https://files.pythonhosted.org/packages/d8/61/4165b1caf07d661c4f0241534bbc18748e49e1ddb849fd9908c36c1d622c/lightgbm-4.0.0.tar.gz#sha256=03d1b3903aa51cd9a5e3965941236f2a7bf5a69d7a76059dbf68fd9b4fc92d8f
+# pip matplotlib @ https://files.pythonhosted.org/packages/47/b9/6c0daa9b953a80b4e6933bf6a11a2d0633f257e84ee5995c5fd35de564c9/matplotlib-3.7.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=318c89edde72ff95d8df67d82aca03861240512994a597a435a1011ba18dbc7f
+# pip pandas @ https://files.pythonhosted.org/packages/83/f0/2765daac3c58165460b127df5c0ef7b3a039f3bfe7ea7a51f3d20b01371b/pandas-2.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=d99e678180bc59b0c9443314297bddce4ad35727a1a2656dbe585fd78710b3b9
+# pip pyamg @ https://files.pythonhosted.org/packages/35/1c/8b2aa6fbb2bae258ab6cdb35b09635bf50865ac2bcdaf220db3d972cc0d8/pyamg-5.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=1332acec6d5ede9440c8ced0ef20952f5b766387116f254b79880ce29fdecee7
 # pip pytest-cov @ https://files.pythonhosted.org/packages/a7/4b/8b78d126e275efa2379b1c2e09dc52cf70df16fc3b90613ef82531499d73/pytest_cov-4.1.0-py3-none-any.whl#sha256=6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a
 # pip pytest-forked @ https://files.pythonhosted.org/packages/f4/af/9c0bda43e486a3c9bf1e0f876d0f241bc3f229d7d65d09331a0868db9629/pytest_forked-1.6.0-py3-none-any.whl#sha256=810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0
 # pip scikit-image @ https://files.pythonhosted.org/packages/19/bd/a53569a0a698d925eb46dbea0bd3b6b62e7287a9ec88b5a03efa8ebd5b14/scikit_image-0.21.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=78b1e96c59cab640ca5c5b22c501524cfaf34cbe0cb51ba73bd9a9ede3fb6e1d
-# pip scikit-learn @ https://files.pythonhosted.org/packages/81/84/756be2b975959a5f94124d5584ead75d7ca99184f2d16664a0157b274b9a/scikit_learn-1.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=ea061bf0283bf9a9f36ea3c5d3231ba2176221bbd430abd2603b1c3b2ed85c89
-# pip sphinx @ https://files.pythonhosted.org/packages/4b/a9/9760e8373a11a62f5ef66684771b0a5b2c4a699bf0dbbc650ca2b75cec36/sphinx-7.0.1-py3-none-any.whl#sha256=60c5e04756c1709a98845ed27a2eed7a556af3993afb66e77fec48189f742616
-# pip lightgbm @ https://files.pythonhosted.org/packages/38/5c/d9773cf0ea7938f3b777eaacc6f9d58f69ca76a667771364ffefed9095b4/lightgbm-3.3.5-py3-none-manylinux1_x86_64.whl#sha256=044f65664c1a32c98cb619bafa97d8cd9d93c2c2d5053376aadfe509a3a3e7fa
-# pip numpydoc @ https://files.pythonhosted.org/packages/c4/81/ad9b8837442ff451eca82515b41ac425f87acff7e2fc016fd1bda13fc01a/numpydoc-1.5.0-py3-none-any.whl#sha256=c997759fb6fc32662801cece76491eedbc0ec619b514932ffd2b270ae89c07f9
 # pip pytest-xdist @ https://files.pythonhosted.org/packages/21/08/b1945d4b4986eb1aa10cf84efc5293bba39da80a2f95db3573dd90678408/pytest_xdist-2.5.0-py3-none-any.whl#sha256=6fe5c74fec98906deb8f2d2b616b5c782022744978e7bd4695d39c8f42d0ce65
+# pip numpydoc @ https://files.pythonhosted.org/packages/c4/81/ad9b8837442ff451eca82515b41ac425f87acff7e2fc016fd1bda13fc01a/numpydoc-1.5.0-py3-none-any.whl#sha256=c997759fb6fc32662801cece76491eedbc0ec619b514932ffd2b270ae89c07f9
+# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/c0/0c/261c0949083c0ac635853528bb0070c89e927841d4e533ba0b5563365c06/sphinxcontrib_applehelp-1.0.7-py3-none-any.whl#sha256=094c4d56209d1734e7d252f6e0b3ccc090bd52ee56807a5d9315b19c122ab15d
+# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/c0/03/010ac733ec7b7f71c1dc88e7115743ee466560d6d85373b56fb9916e4586/sphinxcontrib_devhelp-1.0.5-py3-none-any.whl#sha256=fe8009aed765188f08fcaadbb3ea0d90ce8ae2d76710b7e29ea7d047177dae2f
+# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/28/7a/958f8e3e6abe8219d0d1f1224886de847ab227b218f4a07b61bc337f64be/sphinxcontrib_htmlhelp-2.0.4-py3-none-any.whl#sha256=8001661c077a73c29beaf4a79968d0726103c5605e27db92b9ebed8bab1359e9
+# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/1f/e5/1850f3f118e95581c1e30b57028ac979badee1eb29e70ee72b0241f5a185/sphinxcontrib_qthelp-1.0.6-py3-none-any.whl#sha256=bf76886ee7470b934e363da7a954ea2825650013d367728588732c7350f49ea4
+# pip sphinx @ https://files.pythonhosted.org/packages/a6/54/f4fcf7113eb051a46476ecce9485c463f58dbc3887c06dbfe1e67a8ce7c0/sphinx-7.2.5-py3-none-any.whl#sha256=9269f9ed2821c9ebd30e4204f5c2339f5d4980e377bc89cb2cb6f9b17409c20a
+# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/95/d6/2e0bda62b2a808070ac922d21a950aa2cb5e4fcfb87e5ff5f86bc43a2201/sphinxcontrib_serializinghtml-1.1.9-py3-none-any.whl#sha256=9b36e503703ff04f20e9675771df105e58aa029cfcbc23b8ed716019b7416ae1
diff --git a/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock b/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock
index 7861c2e43cf99..c6435269a4d70 100644
--- a/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock
+++ b/build_tools/azure/pylatest_pip_scipy_dev_linux-64_conda.lock
@@ -1,9 +1,9 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: d7687370ba8c822d5b621703d51324b6767f15f0fc49177381f2a0a81a756684
+# input_hash: 5ead5ba5f8c8179fdc95f486e5db1fa2283387358360c89b1d62ffb6b1018cf0
 @EXPLICIT
 https://repo.anaconda.com/pkgs/main/linux-64/_libgcc_mutex-0.1-main.conda#c3473ff8bdb3d124ed5ff11ec380d6f9
-https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.01.10-h06a4308_0.conda#7704989a2ccf6c1f5a50c985509841c4
+https://repo.anaconda.com/pkgs/main/linux-64/ca-certificates-2023.05.30-h06a4308_0.conda#979be8dd2368decd342b13e01540d297
 https://repo.anaconda.com/pkgs/main/linux-64/ld_impl_linux-64-2.38-h1181459_1.conda#68eedfd9c06f2b0e6888d8db345b7f5b
 https://repo.anaconda.com/pkgs/main/noarch/tzdata-2023c-h04d1e81_0.conda#29db02adf8808f7c64642cead3e28acd
 https://repo.anaconda.com/pkgs/main/linux-64/libgomp-11.2.0-h1234567_1.conda#b372c0eea9b60732fdae4b817a63c8cd
@@ -14,50 +14,50 @@ https://repo.anaconda.com/pkgs/main/linux-64/bzip2-1.0.8-h7b6447c_0.conda#9303f4
 https://repo.anaconda.com/pkgs/main/linux-64/libffi-3.4.4-h6a678d5_0.conda#06e288f9250abef59b9a367d151fc339
 https://repo.anaconda.com/pkgs/main/linux-64/libuuid-1.41.5-h5eee18b_0.conda#4a6a2354414c9080327274aa514e5299
 https://repo.anaconda.com/pkgs/main/linux-64/ncurses-6.4-h6a678d5_0.conda#5558eec6e2191741a92f832ea826251c
-https://repo.anaconda.com/pkgs/main/linux-64/openssl-1.1.1t-h7f8727e_0.conda#0410db682c02665511bd4203ade48a32
+https://repo.anaconda.com/pkgs/main/linux-64/openssl-3.0.10-h7f8727e_2.conda#066a828cc9dcd120af8c503381d6a1b8
 https://repo.anaconda.com/pkgs/main/linux-64/xz-5.4.2-h5eee18b_0.conda#bcd31de48a0dcb44bc5b99675800c5cc
 https://repo.anaconda.com/pkgs/main/linux-64/zlib-1.2.13-h5eee18b_0.conda#333e31fbfbb5057c92fa845ad6adef93
 https://repo.anaconda.com/pkgs/main/linux-64/ccache-3.7.9-hfe4627d_0.conda#bef6fc681c273bb7bd0c67d1a591365e
 https://repo.anaconda.com/pkgs/main/linux-64/readline-8.2-h5eee18b_0.conda#be42180685cce6e6b0329201d9f48efb
 https://repo.anaconda.com/pkgs/main/linux-64/tk-8.6.12-h1ccaba5_0.conda#fa10ff4aa631fa4aa090a6234d7770b9
 https://repo.anaconda.com/pkgs/main/linux-64/sqlite-3.41.2-h5eee18b_0.conda#c7086c9ceb6cfe1c4c729a774a2d88a5
-https://repo.anaconda.com/pkgs/main/linux-64/python-3.11.3-h7a1cb2a_0.conda#d4474259a2525cc6fb272f02ca02873e
-https://repo.anaconda.com/pkgs/main/linux-64/setuptools-67.8.0-py311h06a4308_0.conda#b65f6b9c4547f1fd81af11d4e8b649c4
+https://repo.anaconda.com/pkgs/main/linux-64/python-3.11.4-h955ad1f_0.conda#3c8fb4ea9fdf9a4d27d16cd5fb6fdd16
+https://repo.anaconda.com/pkgs/main/linux-64/setuptools-68.0.0-py311h06a4308_0.conda#eae51c7be37e9fc2b6f708114b9f2e8d
 https://repo.anaconda.com/pkgs/main/linux-64/wheel-0.38.4-py311h06a4308_0.conda#b3d14884810655c572ea9a91df7de205
-https://repo.anaconda.com/pkgs/main/linux-64/pip-23.0.1-py311h06a4308_0.conda#06ec6690fc9814ab769a62dfeeb26419
+https://repo.anaconda.com/pkgs/main/linux-64/pip-23.2.1-py311h06a4308_0.conda#112b1357a2afeacaa247b21c01dc9a06
 # pip alabaster @ https://files.pythonhosted.org/packages/64/88/c7083fc61120ab661c5d0b82cb77079fc1429d3f913a456c1c82cf4658f7/alabaster-0.7.13-py3-none-any.whl#sha256=1ee19aca801bbabb5ba3f5f258e4422dfa86f82f3e9cefb0859b283cdd7f62a3
 # pip babel @ https://files.pythonhosted.org/packages/df/c4/1088865e0246d7ecf56d819a233ab2b72f7d6ab043965ef327d0731b5434/Babel-2.12.1-py3-none-any.whl#sha256=b4246fb7677d3b98f501a39d43396d3cafdc8eadb045f4a31be01863f655c610
-# pip certifi @ https://files.pythonhosted.org/packages/9d/19/59961b522e6757f0c9097e4493fa906031b95b3ebe9360b2c3083561a6b4/certifi-2023.5.7-py3-none-any.whl#sha256=c6c2e98f5c7869efca1f8916fed228dd91539f9f1b444c314c06eef02980c716
-# pip charset-normalizer @ https://files.pythonhosted.org/packages/18/36/7ae10a3dd7f9117b61180671f8d1e4802080cca88ad40aaabd3dad8bab0e/charset_normalizer-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=0ca564606d2caafb0abe6d1b5311c2649e8071eb241b2d64e75a0d0065107e62
-# pip coverage @ https://files.pythonhosted.org/packages/a7/cd/3ce94ad9d407a052dc2a74fbeb1c7947f442155b28264eb467ee78dea812/coverage-7.2.7-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=63426706118b7f5cf6bb6c895dc215d8a418d5952544042c8a2d9fe87fcf09cb
+# pip certifi @ https://files.pythonhosted.org/packages/4c/dd/2234eab22353ffc7d94e8d13177aaa050113286e93e7b40eae01fbf7c3d9/certifi-2023.7.22-py3-none-any.whl#sha256=92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9
+# pip charset-normalizer @ https://files.pythonhosted.org/packages/bc/85/ef25d4ba14c7653c3020a1c6e1a7413e6791ef36a0ac177efa605fc2c737/charset_normalizer-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=246de67b99b6851627d945db38147d1b209a899311b1305dd84916f2b88526c6
+# pip coverage @ https://files.pythonhosted.org/packages/55/63/f2dcc8f7f1587ae54bf8cc1c3b08e07e442633a953537dfaf658a0cbac2c/coverage-7.3.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=fac440c43e9b479d1241fe9d768645e7ccec3fb65dc3a5f6e90675e75c3f3e3a
 # pip docutils @ https://files.pythonhosted.org/packages/26/87/f238c0670b94533ac0353a4e2a1a771a0cc73277b88bff23d3ae35a256c1/docutils-0.20.1-py3-none-any.whl#sha256=96f387a2c5562db4476f09f13bbab2192e764cac08ebbf3a34a95d9b1e4a59d6
-# pip execnet @ https://files.pythonhosted.org/packages/81/c0/3072ecc23f4c5e0a1af35e3a222855cfd9c80a1a105ca67be3b6172637dd/execnet-1.9.0-py2.py3-none-any.whl#sha256=a295f7cc774947aac58dde7fdc85f4aa00c42adf5d8f5468fc630c1acf30a142
+# pip execnet @ https://files.pythonhosted.org/packages/e8/9c/a079946da30fac4924d92dbc617e5367d454954494cf1e71567bcc4e00ee/execnet-2.0.2-py3-none-any.whl#sha256=88256416ae766bc9e8895c76a87928c0012183da3cc4fc18016e6f050e025f41
 # pip idna @ https://files.pythonhosted.org/packages/fc/34/3030de6f1370931b9dbb4dad48f6ab1015ab1d32447850b9fc94e60097be/idna-3.4-py3-none-any.whl#sha256=90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2
 # pip imagesize @ https://files.pythonhosted.org/packages/ff/62/85c4c919272577931d407be5ba5d71c20f0b616d31a0befe0ae45bb79abd/imagesize-1.4.1-py2.py3-none-any.whl#sha256=0d8d18d08f840c19d0ee7ca1fd82490fdc3729b7ac93f49870406ddde8ef8d8b
 # pip iniconfig @ https://files.pythonhosted.org/packages/ef/a6/62565a6e1cf69e10f5727360368e451d4b7f58beeac6173dc9db836a5b46/iniconfig-2.0.0-py3-none-any.whl#sha256=b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374
 # pip markupsafe @ https://files.pythonhosted.org/packages/fe/21/2eff1de472ca6c99ec3993eab11308787b9879af9ca8bbceb4868cf4f2ca/MarkupSafe-2.1.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=bfce63a9e7834b12b87c64d6b155fdd9b3b96191b6bd334bf37db7ff1fe457f2
 # pip packaging @ https://files.pythonhosted.org/packages/ab/c3/57f0601a2d4fe15de7a553c00adbc901425661bf048f2a22dfc500caf121/packaging-23.1-py3-none-any.whl#sha256=994793af429502c4ea2ebf6bf664629d07c1a9fe974af92966e4b8d2df7edc61
-# pip platformdirs @ https://files.pythonhosted.org/packages/89/7e/c6ff9ddcf93b9b36c90d88111c4db354afab7f9a58c7ac3257fa717f1268/platformdirs-3.5.1-py3-none-any.whl#sha256=e2378146f1964972c03c085bb5662ae80b2b8c06226c54b2ff4aa9483e8a13a5
-# pip pluggy @ https://files.pythonhosted.org/packages/9e/01/f38e2ff29715251cf25532b9082a1589ab7e4f571ced434f98d0139336dc/pluggy-1.0.0-py2.py3-none-any.whl#sha256=74134bbf457f031a36d68416e1509f34bd5ccc019f0bcc952c7b909d06b37bd3
+# pip platformdirs @ https://files.pythonhosted.org/packages/14/51/fe5a0d6ea589f0d4a1b97824fb518962ad48b27cd346dcdfa2405187997a/platformdirs-3.10.0-py3-none-any.whl#sha256=d7c24979f292f916dc9cbf8648319032f551ea8c49a4c9bf2fb556a02070ec1d
+# pip pluggy @ https://files.pythonhosted.org/packages/05/b8/42ed91898d4784546c5f06c60506400548db3f7a4b3fb441cba4e5c17952/pluggy-1.3.0-py3-none-any.whl#sha256=d89c696a773f8bd377d18e5ecda92b7a3793cbe66c87060a6fb58c7b6e1061f7
 # pip py @ https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl#sha256=607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378
-# pip pygments @ https://files.pythonhosted.org/packages/34/a7/37c8d68532ba71549db4212cb036dbd6161b40e463aba336770e80c72f84/Pygments-2.15.1-py3-none-any.whl#sha256=db2db3deb4b4179f399a09054b023b6a586b76499d36965813c71aa8ed7b5fd1
+# pip pygments @ https://files.pythonhosted.org/packages/43/88/29adf0b44ba6ac85045e63734ae0997d3c58d8b1a91c914d240828d0d73d/Pygments-2.16.1-py3-none-any.whl#sha256=13fc09fa63bc8d8671a6d247e1eb303c4b343eaee81d861f3404db2935653692
 # pip six @ https://files.pythonhosted.org/packages/d9/5a/e7c31adbe875f2abbb91bd84cf2dc52d792b5a01506781dbcf25c91daf11/six-1.16.0-py2.py3-none-any.whl#sha256=8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
 # pip snowballstemmer @ https://files.pythonhosted.org/packages/ed/dc/c02e01294f7265e63a7315fe086dd1df7dacb9f840a804da846b96d01b96/snowballstemmer-2.2.0-py2.py3-none-any.whl#sha256=c8e1716e83cc398ae16824e5572ae04e0d9fc2c6b985fb0f900f5f0c96ecba1a
-# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/06/c1/5e2cafbd03105ce50d8500f9b4e8a6e8d02e22d0475b574c3b3e9451a15f/sphinxcontrib_applehelp-1.0.4-py3-none-any.whl#sha256=29d341f67fb0f6f586b23ad80e072c8e6ad0b48417db2bde114a4c9746feb228
-# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/c5/09/5de5ed43a521387f18bdf5f5af31d099605c992fd25372b2b9b825ce48ee/sphinxcontrib_devhelp-1.0.2-py2.py3-none-any.whl#sha256=8165223f9a335cc1af7ffe1ed31d2871f325254c0423bc0c4c7cd1c1e4734a2e
-# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/6e/ee/a1f5e39046cbb5f8bc8fba87d1ddf1c6643fbc9194e58d26e606de4b9074/sphinxcontrib_htmlhelp-2.0.1-py3-none-any.whl#sha256=c38cb46dccf316c79de6e5515e1770414b797162b23cd3d06e67020e1d2a6903
 # pip sphinxcontrib-jsmath @ https://files.pythonhosted.org/packages/c2/42/4c8646762ee83602e3fb3fbe774c2fac12f317deb0b5dbeeedd2d3ba4b77/sphinxcontrib_jsmath-1.0.1-py2.py3-none-any.whl#sha256=2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178
-# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/2b/14/05f9206cf4e9cfca1afb5fd224c7cd434dcc3a433d6d9e4e0264d29c6cdb/sphinxcontrib_qthelp-1.0.3-py2.py3-none-any.whl#sha256=bd9fc24bcb748a8d51fd4ecaade681350aa63009a347a8c14e637895444dfab6
-# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/c6/77/5464ec50dd0f1c1037e3c93249b040c8fc8078fdda97530eeb02424b6eea/sphinxcontrib_serializinghtml-1.1.5-py2.py3-none-any.whl#sha256=352a9a00ae864471d3a7ead8d7d79f5fc0b57e8b3f95e9867eb9eb28999b92fd
-# pip threadpoolctl @ https://files.pythonhosted.org/packages/61/cf/6e354304bcb9c6413c4e02a747b600061c21d38ba51e7e544ac7bc66aecc/threadpoolctl-3.1.0-py3-none-any.whl#sha256=8b99adda265feb6773280df41eece7b2e6561b772d21ffd52e372f999024907b
-# pip urllib3 @ https://files.pythonhosted.org/packages/4b/1d/f8383ef593114755429c307449e7717b87044b3bcd5f7860b89b1f759e34/urllib3-2.0.2-py3-none-any.whl#sha256=d055c2f9d38dc53c808f6fdc8eab7360b6fdbbde02340ed25cfbcd817c62469e
+# pip threadpoolctl @ https://files.pythonhosted.org/packages/81/12/fd4dea011af9d69e1cad05c75f3f7202cdcbeac9b712eea58ca779a72865/threadpoolctl-3.2.0-py3-none-any.whl#sha256=2b7818516e423bdaebb97c723f86a7c6b0a83d3f3b0970328d66f4d9104dc032
+# pip urllib3 @ https://files.pythonhosted.org/packages/9b/81/62fd61001fa4b9d0df6e31d47ff49cfa9de4af03adecf339c7bc30656b37/urllib3-2.0.4-py3-none-any.whl#sha256=de7df1803967d2c2a98e4b11bb7d6bd9210474c46e8a0401514e3a42a75ebde4
 # pip jinja2 @ https://files.pythonhosted.org/packages/bc/c3/f068337a370801f372f2f8f6bad74a5c140f6fda3d9de154052708dd3c65/Jinja2-3.1.2-py3-none-any.whl#sha256=6088930bfe239f0e6710546ab9c19c9ef35e29792895fed6e6e31a023a182a61
-# pip pytest @ https://files.pythonhosted.org/packages/1b/d1/72df649a705af1e3a09ffe14b0c7d3be1fd730da6b98beb4a2ed26b8a023/pytest-7.3.1-py3-none-any.whl#sha256=3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362
+# pip pytest @ https://files.pythonhosted.org/packages/33/b2/741130cbcf2bbfa852ed95a60dc311c9e232c7ed25bac3d9b8880a8df4ae/pytest-7.4.0-py3-none-any.whl#sha256=78bf16451a2eb8c7a2ea98e32dc119fd2aa758f1d5d66dbf0a59d69a3969df32
 # pip python-dateutil @ https://files.pythonhosted.org/packages/36/7a/87837f39d0296e723bb9b62bbb257d0355c7f6128853c78955f57342a56d/python_dateutil-2.8.2-py2.py3-none-any.whl#sha256=961d03dc3453ebbc59dbdea9e4e11c5651520a876d0f4db161e8674aae935da9
 # pip requests @ https://files.pythonhosted.org/packages/70/8e/0e2d847013cb52cd35b38c009bb167a1a26b2ce6cd6965bf26b47bc0bf44/requests-2.31.0-py3-none-any.whl#sha256=58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f
 # pip pooch @ https://files.pythonhosted.org/packages/84/8c/4da580db7fb4cfce8f5ed78e7d2aa542e6f201edd69d3d8a96917a8ff63c/pooch-1.7.0-py3-none-any.whl#sha256=74258224fc33d58f53113cf955e8d51bf01386b91492927d0d1b6b341a765ad7
 # pip pytest-cov @ https://files.pythonhosted.org/packages/a7/4b/8b78d126e275efa2379b1c2e09dc52cf70df16fc3b90613ef82531499d73/pytest_cov-4.1.0-py3-none-any.whl#sha256=6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a
 # pip pytest-forked @ https://files.pythonhosted.org/packages/f4/af/9c0bda43e486a3c9bf1e0f876d0f241bc3f229d7d65d09331a0868db9629/pytest_forked-1.6.0-py3-none-any.whl#sha256=810958f66a91afb1a1e2ae83089d8dc1cd2437ac96b12963042fbb9fb4d16af0
-# pip sphinx @ https://files.pythonhosted.org/packages/4b/a9/9760e8373a11a62f5ef66684771b0a5b2c4a699bf0dbbc650ca2b75cec36/sphinx-7.0.1-py3-none-any.whl#sha256=60c5e04756c1709a98845ed27a2eed7a556af3993afb66e77fec48189f742616
-# pip numpydoc @ https://files.pythonhosted.org/packages/c4/81/ad9b8837442ff451eca82515b41ac425f87acff7e2fc016fd1bda13fc01a/numpydoc-1.5.0-py3-none-any.whl#sha256=c997759fb6fc32662801cece76491eedbc0ec619b514932ffd2b270ae89c07f9
 # pip pytest-xdist @ https://files.pythonhosted.org/packages/21/08/b1945d4b4986eb1aa10cf84efc5293bba39da80a2f95db3573dd90678408/pytest_xdist-2.5.0-py3-none-any.whl#sha256=6fe5c74fec98906deb8f2d2b616b5c782022744978e7bd4695d39c8f42d0ce65
+# pip numpydoc @ https://files.pythonhosted.org/packages/c4/81/ad9b8837442ff451eca82515b41ac425f87acff7e2fc016fd1bda13fc01a/numpydoc-1.5.0-py3-none-any.whl#sha256=c997759fb6fc32662801cece76491eedbc0ec619b514932ffd2b270ae89c07f9
+# pip sphinxcontrib-applehelp @ https://files.pythonhosted.org/packages/c0/0c/261c0949083c0ac635853528bb0070c89e927841d4e533ba0b5563365c06/sphinxcontrib_applehelp-1.0.7-py3-none-any.whl#sha256=094c4d56209d1734e7d252f6e0b3ccc090bd52ee56807a5d9315b19c122ab15d
+# pip sphinxcontrib-devhelp @ https://files.pythonhosted.org/packages/c0/03/010ac733ec7b7f71c1dc88e7115743ee466560d6d85373b56fb9916e4586/sphinxcontrib_devhelp-1.0.5-py3-none-any.whl#sha256=fe8009aed765188f08fcaadbb3ea0d90ce8ae2d76710b7e29ea7d047177dae2f
+# pip sphinxcontrib-htmlhelp @ https://files.pythonhosted.org/packages/28/7a/958f8e3e6abe8219d0d1f1224886de847ab227b218f4a07b61bc337f64be/sphinxcontrib_htmlhelp-2.0.4-py3-none-any.whl#sha256=8001661c077a73c29beaf4a79968d0726103c5605e27db92b9ebed8bab1359e9
+# pip sphinxcontrib-qthelp @ https://files.pythonhosted.org/packages/1f/e5/1850f3f118e95581c1e30b57028ac979badee1eb29e70ee72b0241f5a185/sphinxcontrib_qthelp-1.0.6-py3-none-any.whl#sha256=bf76886ee7470b934e363da7a954ea2825650013d367728588732c7350f49ea4
+# pip sphinx @ https://files.pythonhosted.org/packages/a6/54/f4fcf7113eb051a46476ecce9485c463f58dbc3887c06dbfe1e67a8ce7c0/sphinx-7.2.5-py3-none-any.whl#sha256=9269f9ed2821c9ebd30e4204f5c2339f5d4980e377bc89cb2cb6f9b17409c20a
+# pip sphinxcontrib-serializinghtml @ https://files.pythonhosted.org/packages/95/d6/2e0bda62b2a808070ac922d21a950aa2cb5e4fcfb87e5ff5f86bc43a2201/sphinxcontrib_serializinghtml-1.1.9-py3-none-any.whl#sha256=9b36e503703ff04f20e9675771df105e58aa029cfcbc23b8ed716019b7416ae1
diff --git a/build_tools/azure/pypy3_environment.yml b/build_tools/azure/pypy3_environment.yml
index d4f0d22e96042..4b330811b3b89 100644
--- a/build_tools/azure/pypy3_environment.yml
+++ b/build_tools/azure/pypy3_environment.yml
@@ -9,7 +9,7 @@ dependencies:
   - numpy
   - blas[build=openblas]
   - scipy
-  - cython
+  - cython<3.0.0
   - joblib
   - threadpoolctl
   - matplotlib
diff --git a/build_tools/azure/pypy3_linux-64_conda.lock b/build_tools/azure/pypy3_linux-64_conda.lock
index 5cadf0f58de2f..a54db261e6488 100644
--- a/build_tools/azure/pypy3_linux-64_conda.lock
+++ b/build_tools/azure/pypy3_linux-64_conda.lock
@@ -1,9 +1,9 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: 35e4a4f1db15219fa4cb71af7b54acc24ec7c3b3610c479f979c6c44cbd93db7
+# input_hash: b497bbdf0c852acb64f6a7e5ce3e83c2e19f6b37acfe89d6f731809c375bf6bb
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2
 https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.1.0-h15d22d2_0.conda#afb656a334c409dd9805508af1c89c7a
 https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d
 https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-3_pypy39_pp73.conda#6f23be0f8f1e4871998437b188425ea3
@@ -13,17 +13,17 @@ https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar
 https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c
 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54
 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_0.conda#e805cbec4c29feb22e019245f7e47b6c
 https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8
 https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd
 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
 https://conda.anaconda.org/conda-forge/linux-64/libhiredis-1.0.2-h2cc385e_0.tar.bz2#b34907d3a81a3cd8095ee83d174c074a
 https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-h0b41bf4_0.conda#1edd9e67bdb90d78cea97733ff6b54e6
 https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.23-pthreads_h80387f5_0.conda#9c5ea51ccb8ffae7d06c645869d24ce6
-https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408
-https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41
+https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.1-hd590300_0.conda#82bf6f63eb15ef719b556b63feec3a77
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad
 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179
-https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.2-hd590300_0.conda#e5ac5227582d6c83ccf247288c0eb095
 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
 https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
@@ -33,77 +33,77 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007
 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
 https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920
 https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-17_linux64_openblas.conda#57fb44770b1bc832fb2dbefa1bd502de
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_0.conda#43017394a280a42b48d11d2a6e169901
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_0.conda#8e3e1cb77c4b355a3776bdfb74095bed
 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416
-https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.43.0-h2797004_0.conda#903fa782a9067d5934210df6d79220f6
 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c
 https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.23-pthreads_h855a84d_0.conda#ba8810202f8879562f01b4f9957c1ada
 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168
-https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295
-https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555
-https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589
+https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_0.conda#aeafb07a327e3f14a796bf081ea07472
 https://conda.anaconda.org/conda-forge/linux-64/ccache-4.8.1-h1fcd64f_0.conda#fd37a0c47d8b3667b73af0549037ce83
 https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06
 https://conda.anaconda.org/conda-forge/linux-64/gdbm-1.18-h0a1914f_2.tar.bz2#b77bc399b07a19c00fe12fdc95ee0297
 https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-17_linux64_openblas.conda#7ef0969b00fe3d6eef56a8151d3afb29
 https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-17_linux64_openblas.conda#a2103882c46492e26500fcb56c03de8b
-https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748
-https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea
-https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.42.0-h2c6b66d_0.conda#1192f6ec654a5bc4ee1d64bdc4a3e5cc
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d
-https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418
+https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.1-h8b53f26_1.conda#5b09e13d732dda1a2bc9adc711164f4d
+https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.6-h4dfa4b3_0.conda#b096c85c415519259e731d8fb719a3ef
+https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.43.0-h2c6b66d_0.conda#713f9eac95d051abe14c3774376854fe
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.6-h8ee46fc_0.conda#7590b76c3d11d21caa44f3fc38ac584a
+https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_0.conda#3db48055eab680e43a122e2c7494e7ae
 https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28
 https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-17_linux64_openblas.conda#949709aa6ee6a2dcdb3de6dd99147d17
 https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea
-https://conda.anaconda.org/conda-forge/linux-64/pypy3.9-7.3.11-h9557127_1.conda#c5fe8c8aaecf7dd44dc3042789f95987
+https://conda.anaconda.org/conda-forge/linux-64/pypy3.9-7.3.12-h9557127_3.conda#3afdeabc635c518f3b385ebdc2c9b4fd
 https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-17_linux64_openblas.conda#fde382e41d77b65315fab79ab93a20ab
-https://conda.anaconda.org/conda-forge/linux-64/python-3.9.16-0_73_pypy.conda#16eebd2564f86026ea0abe5b8e446438
+https://conda.anaconda.org/conda-forge/linux-64/python-3.9.17-0_73_pypy.conda#8c4b6b109d966a4e9f0df96c464358bf
 https://conda.anaconda.org/conda-forge/linux-64/blas-2.117-openblas.conda#54b4b02b897156056f3056f992261d0c
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
+https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39hc10206b_0.conda#dbf9c9549cfb4e7788b895d48d4eea08
+https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
-https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py39hc10206b_0.conda#9e7ab7c9dfff3ea8c3df6f68c657436b
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
+https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.36-py39hc10206b_0.conda#f4286c793e0f1a4bb6a04231bf01a080
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py39h2865249_1.tar.bz2#6b7e75ba141872a00154f312d43d9a8c
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39ha90811c_0.conda#09ac3560eeeab8a527d65c1458f3008b
 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
-https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py39h129f8d9_0.conda#0021613f91e64bd6fa3aece9e5b68f34
+https://conda.anaconda.org/conda-forge/linux-64/numpy-1.25.2-py39h129f8d9_0.conda#134b81b5cb145952044ebbb5c70bb306
 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
-https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py39hb514683_1.conda#beec7faed9dff6b30e8a1a1c22f9f039
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
+https://conda.anaconda.org/conda-forge/linux-64/pillow-10.0.0-py39hb514683_0.conda#6e21ef6b7b5618ad333edc3602ae4e56
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc
-https://conda.anaconda.org/conda-forge/noarch/pypy-7.3.11-0_pypy39.conda#059800e8aa07f99d31e3dd0bf553a3f6
+https://conda.anaconda.org/conda-forge/noarch/pypy-7.3.12-0_pypy39.conda#a6ee4ccd8c88b663f57fd30c18b3bed3
 https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
-https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe
+https://conda.anaconda.org/conda-forge/noarch/setuptools-68.1.2-pyhd8ed1ab_0.conda#4fe12573bf499ff85a0a364e00cc5c53
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
-https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py39hf860d4a_0.conda#f3adae0ec927d6c139ef9557bda43fd0
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py39hf860d4a_0.conda#b5129f42740bdd8ce9829d1f57168885
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.7.1-pyha770c72_0.conda#c39d6a09fe819de4951c2642629d9115
 https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py39h4d8b378_0.tar.bz2#44eea5be274d005065d87df9cf2a9234
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf
-https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py39haa83c70_0.conda#77595fa3e3dfca46289e3722cb97b29b
-https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py39hf860d4a_0.conda#fd4b05a718ebd4fabc806466f7f3ed8f
-https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9
-https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.16.2-pyhd8ed1ab_0.conda#2da0451b54c4563c32490cb1b7cf68a1
+https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.1.0-py39ha90811c_0.conda#9ff9b1b02301cf75a8585895856fff0f
+https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.42.1-py39hf860d4a_0.conda#7f0f8b1c54e8da1eeaaaae3e33cd038f
+https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.0.1-pyhd8ed1ab_0.conda#d978c61aa5fc2c69380d53ad56b5ae86
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
+https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d
-https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4
+https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.7.1-hd8ed1ab_0.conda#f96688577f1faa58096d06a45136afa2
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e
+https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.0.1-pyhd8ed1ab_0.conda#54661981fd331e20847d8a49543dd9af
+https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236
+https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py39h3a8b213_0.conda#9e1009635ea6b7924f827d6022d0ade6
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.2-py39hfcdda22_0.conda#27f24b3402229299a3d5fdeee0b417b9
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6
-https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py39h4162558_0.conda#b6ca076a90a7f2a8d7ff976d243dd4c5
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
-https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py39h129f8d9_3.conda#ccc3e84894f1a2b3fea200b4e8946903
-https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py39h6728ab1_0.conda#ee14077fae1c48e0ca5154f5a5427521
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.2-py39h4162558_0.conda#839abb578897996f795648dc0b621b30
+https://conda.anaconda.org/conda-forge/linux-64/scipy-1.11.2-py39h129f8d9_0.conda#296411437a9b4e75d088c7200bbc6500
+https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.1-py39h00faaa6_0.conda#d5b46d053aa61bb420230a041afcb136
diff --git a/build_tools/azure/python_nogil_lock.txt b/build_tools/azure/python_nogil_lock.txt
index cd44de206adb4..70a9e4fb69e82 100644
--- a/build_tools/azure/python_nogil_lock.txt
+++ b/build_tools/azure/python_nogil_lock.txt
@@ -8,20 +8,22 @@
 --extra-index-url https://pypi.org/simple
 
 contourpy==1.0.7
-    # via matplotlib
+    # via
+    #   -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt
+    #   matplotlib
 cycler==0.11.0
     # via matplotlib
 cython==0.29.33
     # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt
-exceptiongroup==1.1.1
+exceptiongroup==1.1.2
     # via pytest
-execnet==1.9.0
+execnet==2.0.2
     # via pytest-xdist
-fonttools==4.39.4
+fonttools==4.42.0
     # via matplotlib
 iniconfig==2.0.0
     # via pytest
-joblib==1.2.0
+joblib==1.3.1
     # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt
 kiwisolver==1.4.4
     # via matplotlib
@@ -39,15 +41,15 @@ packaging==23.1
     #   pytest
 pillow==9.5.0
     # via matplotlib
-pluggy==1.0.0
+pluggy==1.2.0
     # via pytest
-pyparsing==3.0.9
+pyparsing==3.1.1
     # via matplotlib
-pytest==7.3.1
+pytest==7.4.0
     # via
     #   -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt
     #   pytest-xdist
-pytest-xdist==3.3.0
+pytest-xdist==3.3.1
     # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt
 python-dateutil==2.8.2
     # via matplotlib
@@ -55,7 +57,7 @@ scipy==1.9.3
     # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt
 six==1.16.0
     # via python-dateutil
-threadpoolctl==3.1.0
+threadpoolctl==3.2.0
     # via -r /scikit-learn/build_tools/azure/python_nogil_requirements.txt
 tomli==2.0.1
     # via pytest
diff --git a/build_tools/azure/python_nogil_requirements.txt b/build_tools/azure/python_nogil_requirements.txt
index 970059ede81aa..de7e729f77263 100644
--- a/build_tools/azure/python_nogil_requirements.txt
+++ b/build_tools/azure/python_nogil_requirements.txt
@@ -6,6 +6,9 @@
 # the latest cython will be picked up from PyPI, rather than the one from the
 # python-nogil index
 matplotlib
+# 2023-07-15 contourpy 1.1 needs meson which needs a recent pybind version
+# which is not available for python/nogil ...
+contourpy<1.1
 numpy
 scipy
 cython
diff --git a/build_tools/azure/test_script.sh b/build_tools/azure/test_script.sh
index 98ac2e797b73c..5117473ea6366 100755
--- a/build_tools/azure/test_script.sh
+++ b/build_tools/azure/test_script.sh
@@ -49,7 +49,7 @@ if [[ "$COVERAGE" == "true" ]]; then
 fi
 
 if [[ -n "$CHECK_WARNINGS" ]]; then
-    TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning -Werror::numpy.VisibleDeprecationWarning"
+    TEST_CMD="$TEST_CMD -Werror::DeprecationWarning -Werror::FutureWarning -Werror::sklearn.utils.fixes.VisibleDeprecationWarning"
 
     # numpy's 1.19.0's tostring() deprecation is ignored until scipy and joblib
     # removes its usage
@@ -75,10 +75,6 @@ if [[ "$PYTEST_XDIST_VERSION" != "none" ]]; then
     TEST_CMD="$TEST_CMD -n$XDIST_WORKERS"
 fi
 
-if [[ "$SHOW_SHORT_SUMMARY" == "true" ]]; then
-    TEST_CMD="$TEST_CMD -ra"
-fi
-
 if [[ -n "$SELECTED_TESTS" ]]; then
     TEST_CMD="$TEST_CMD -k $SELECTED_TESTS"
 
diff --git a/build_tools/azure/ubuntu_atlas_lock.txt b/build_tools/azure/ubuntu_atlas_lock.txt
index 255d037ccbaee..1e90eaf311711 100644
--- a/build_tools/azure/ubuntu_atlas_lock.txt
+++ b/build_tools/azure/ubuntu_atlas_lock.txt
@@ -4,11 +4,11 @@
 #
 #    pip-compile --output-file=build_tools/azure/ubuntu_atlas_lock.txt build_tools/azure/ubuntu_atlas_requirements.txt
 #
-cython==0.29.35
+cython==0.29.36
     # via -r build_tools/azure/ubuntu_atlas_requirements.txt
-exceptiongroup==1.1.1
+exceptiongroup==1.1.3
     # via pytest
-execnet==1.9.0
+execnet==2.0.2
     # via pytest-xdist
 iniconfig==2.0.0
     # via pytest
@@ -16,11 +16,11 @@ joblib==1.1.1
     # via -r build_tools/azure/ubuntu_atlas_requirements.txt
 packaging==23.1
     # via pytest
-pluggy==1.0.0
+pluggy==1.3.0
     # via pytest
 py==1.11.0
     # via pytest-forked
-pytest==7.3.1
+pytest==7.4.0
     # via
     #   -r build_tools/azure/ubuntu_atlas_requirements.txt
     #   pytest-forked
diff --git a/build_tools/azure/ubuntu_atlas_requirements.txt b/build_tools/azure/ubuntu_atlas_requirements.txt
index 57413851e5329..97b6bd8a83a26 100644
--- a/build_tools/azure/ubuntu_atlas_requirements.txt
+++ b/build_tools/azure/ubuntu_atlas_requirements.txt
@@ -1,7 +1,7 @@
 # DO NOT EDIT: this file is generated from the specification found in the
 # following script to centralize the configuration for CI builds:
 # build_tools/update_environments_and_lock_files.py
-cython
+cython<3.0.0
 joblib==1.1.1  # min
 threadpoolctl==2.0.0  # min
 pytest
diff --git a/build_tools/circle/build_doc.sh b/build_tools/circle/build_doc.sh
index 13be474ef4e28..35fee3ae50b65 100755
--- a/build_tools/circle/build_doc.sh
+++ b/build_tools/circle/build_doc.sh
@@ -148,8 +148,6 @@ else
     make_args=html
 fi
 
-make_args="SPHINXOPTS=-T $make_args"  # show full traceback on exception
-
 # Installing required system packages to support the rendering of math
 # notation in the HTML documentation and to optimize the image files
 sudo -E apt-get -yq update --allow-releaseinfo-change
diff --git a/build_tools/circle/doc_environment.yml b/build_tools/circle/doc_environment.yml
index 84be13dfa5218..af00f57a3dc51 100644
--- a/build_tools/circle/doc_environment.yml
+++ b/build_tools/circle/doc_environment.yml
@@ -8,7 +8,7 @@ dependencies:
   - numpy
   - blas
   - scipy
-  - cython
+  - cython<3.0.0
   - joblib
   - threadpoolctl
   - matplotlib
@@ -24,6 +24,7 @@ dependencies:
   - compilers
   - sphinx=6.0.0
   - sphinx-gallery
+  - sphinx-copybutton
   - numpydoc
   - sphinx-prompt
   - plotly
diff --git a/build_tools/circle/doc_linux-64_conda.lock b/build_tools/circle/doc_linux-64_conda.lock
index 76113302d2a0f..96176da5e65fb 100644
--- a/build_tools/circle/doc_linux-64_conda.lock
+++ b/build_tools/circle/doc_linux-64_conda.lock
@@ -1,72 +1,69 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: 936006a8395a70f77e3b4ebe07bd10d013d2e2d13b6042ce96f73632d466d840
+# input_hash: b41f095377c6c64519b9c000301e61a7405086d523d7eec1238cdf4f8b896bb2
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-dejavu-sans-mono-2.37-hab24e00_0.tar.bz2#0c96522c6bdaed4b1566d11387caaf45
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-inconsolata-3.000-h77eed37_0.tar.bz2#34893075a5c9e55cdafac56607368fc6
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-source-code-pro-2.038-h77eed37_0.tar.bz2#4d59c254e01d9cde7957100457e2d5fb
 https://conda.anaconda.org/conda-forge/noarch/font-ttf-ubuntu-0.83-hab24e00_0.tar.bz2#19410c3df09dfb12d1206132a1d357c5
-https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_15.tar.bz2#5dd5127afd710f91f6a75821bac0a4f0
-https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.39-hcc3a1bd_1.conda#737be0d34c22d24432049ab7a3214de4
-https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-11.3.0-h210ce93_19.tar.bz2#9b7bdb0b42ce4e4670d32bfe0532b56a
-https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.1.0-h15d22d2_0.conda#afb656a334c409dd9805508af1c89c7a
-https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-11.3.0-h210ce93_19.tar.bz2#8aee006c0662f551f3acef9a7077a5b9
-https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.conda#067bcc23164642f4c226da631f2a2e1d
+https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_16.conda#7ca122655873935e02c91279c5b03c8c
+https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.40-h41732ed_0.conda#7aca3059a1729aa76c597603f10b0dd3
+https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-12.3.0-h8bca6fd_1.conda#3cc063c1c517cc9a2a95ff5b46d00474
+https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-devel_linux-64-12.3.0-h8bca6fd_1.conda#6b100b3dbd31f05781aaef6d4a8886af
+https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.2.0-h7e041cc_1.conda#acfb4817400db5804030a3a7ef7909a1
 https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.9-3_cp39.conda#0dd193187d54e585cac7eab942a8847e
 https://conda.anaconda.org/conda-forge/noarch/tzdata-2023c-h71feb2d_0.conda#939e3e74d8be4dac89ce83b20de2492a
 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-forge-1-0.tar.bz2#f766549260d6815b0c52253f1fb1bb29
-https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.1.0-h69a702a_0.conda#506dc07710dd5b0ba63cbf134897fc10
-https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.1.0-he5830b7_0.conda#56ca14d57ac29a75d23a39eb3ee0ddeb
-https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_15.tar.bz2#66c192522eacf5bb763568b4e415d133
-https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.39-he00db2b_1.conda#3d726e8b51a1f5bfd66892a2b7d9db2d
+https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h807b86a_1.conda#8bb001683321dcbde117a7337b5aace7
+https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_16.conda#071ea8dceff4d30ac511f4a2f8437cd1
+https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.40-hf600244_0.conda#33084421a8c0af6aef1b439707f7662a
 https://conda.anaconda.org/conda-forge/noarch/fonts-conda-ecosystem-1-0.tar.bz2#fee5683a3f04bd15cbd8318b096a27ab
-https://conda.anaconda.org/conda-forge/linux-64/binutils-2.39-hdd6e379_1.conda#1276c18b0a562739185dbf5bd14b57b2
-https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.39-h5fc0e48_13.conda#7f25a524665e4e2f8a5f86522f8d0e31
+https://conda.anaconda.org/conda-forge/linux-64/binutils-2.40-hdd6e379_0.conda#ccc940fddbc3fcd3d79cd4c654c4b5c4
+https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.40-hbdbef99_2.conda#adfebae9fdc63a598495dfe3b006973a
 https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_kmp_llvm.tar.bz2#562b26ba2e19059551a811e72ab7f793
-https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.1.0-he5830b7_0.conda#cd93f779ff018dd85c7544c015c9db3c
-https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.8-h166bdaf_0.tar.bz2#be733e69048951df1e4b4b7bb8c7666f
+https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h807b86a_1.conda#ff8999574b465089ba0aa25a5e865bd0
+https://conda.anaconda.org/conda-forge/linux-64/alsa-lib-1.2.10-hd590300_0.conda#75dae9a4201732aa78a530b826ee5fe0
 https://conda.anaconda.org/conda-forge/linux-64/aom-3.5.0-h27087fc_0.tar.bz2#a08150fd2298460cd1fcccf626305642
 https://conda.anaconda.org/conda-forge/linux-64/attr-2.5.1-h166bdaf_1.tar.bz2#d9c69a24ad678ffce24c6543a0176b00
 https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-h7f98852_4.tar.bz2#a1fd65c7ccbf10880423d82bca54eb54
-https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.19.1-hd590300_0.conda#e8c18d865be43e2fb3f7a145b6adf1f5
 https://conda.anaconda.org/conda-forge/linux-64/charls-2.4.2-h59595ed_0.conda#4336bd67920dd504cd8c6761d6a99645
 https://conda.anaconda.org/conda-forge/linux-64/dav1d-1.2.1-hd590300_0.conda#418c6ca5929a611cbd69204907a83995
 https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz2#14947d8770185e5153fdd04d4673ed37
 https://conda.anaconda.org/conda-forge/linux-64/giflib-5.2.1-h0b41bf4_3.conda#96f3b11872ef6fad973eac856cd2624f
 https://conda.anaconda.org/conda-forge/linux-64/graphite2-1.3.13-h58526e2_1001.tar.bz2#8c54672728e8ec6aa6db90cf2806d220
-https://conda.anaconda.org/conda-forge/linux-64/icu-72.1-hcb278e6_0.conda#7c8d20d847bb45f56bd941578fcfa146
+https://conda.anaconda.org/conda-forge/linux-64/icu-73.2-h59595ed_0.conda#cc47e1facc155f91abd89b11e48e72ff
 https://conda.anaconda.org/conda-forge/linux-64/jxrlib-1.1-h7f98852_2.tar.bz2#8e787b08fe19986d99d034b839df2961
 https://conda.anaconda.org/conda-forge/linux-64/keyutils-1.6.1-h166bdaf_0.tar.bz2#30186d27e2c9fa62b45fb1476b7200e3
 https://conda.anaconda.org/conda-forge/linux-64/lame-3.100-h166bdaf_1003.tar.bz2#a8832b479f93521a9e7b5b743803be51
 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
 https://conda.anaconda.org/conda-forge/linux-64/libaec-1.0.6-hcb278e6_1.conda#0f683578378cddb223e7fd24f785ab2a
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4
-https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.18-h0b41bf4_0.conda#6aa9c9de5542ecb07fdda9ca626252d8
-https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-h516909a_1.tar.bz2#6f8720dff19e17ce5d48cfe7f3d2f0a3
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.1.0-hd590300_0.conda#e805cbec4c29feb22e019245f7e47b6c
+https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.19-hd590300_0.conda#1635570038840ee3f9c71d22aa5b8b6d
 https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd
 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.2-h7f98852_5.tar.bz2#d645c6d2ac96843a2bfaccd2d62b3ac3
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran5-13.2.0-ha4646dd_1.conda#a0d27fd5c6f05aa45e9602b1db49581c
 https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d
-https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-h0b41bf4_0.conda#1edd9e67bdb90d78cea97733ff6b54e6
+https://conda.anaconda.org/conda-forge/linux-64/libjpeg-turbo-2.1.5.1-hd590300_1.conda#323e90742f0f48fc22bea908735f55e6
 https://conda.anaconda.org/conda-forge/linux-64/libnsl-2.0.0-h7f98852_0.tar.bz2#39b1328babf85c7c3a61636d9cd50206
 https://conda.anaconda.org/conda-forge/linux-64/libogg-1.3.4-h7f98852_1.tar.bz2#6e8cc2173440d77708196c5b93771680
-https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.23-pthreads_h80387f5_0.conda#9c5ea51ccb8ffae7d06c645869d24ce6
 https://conda.anaconda.org/conda-forge/linux-64/libopus-1.3.1-h7f98852_1.tar.bz2#15345e56d527b330e1cacbdf58676e8f
-https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-11.3.0-h239ccf8_19.tar.bz2#d17fd55aed84ab6592c5419b6600501c
+https://conda.anaconda.org/conda-forge/linux-64/libsanitizer-12.3.0-h0f45ef3_1.conda#9127a4e7cbf6ffff9f8356653b971d4e
 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
-https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408
-https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41
+https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.2-hd590300_0.conda#30de3fd9b3b602f7473f30e684eeea8c
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad
 https://conda.anaconda.org/conda-forge/linux-64/libzopfli-1.0.3-h9c3ff4c_0.tar.bz2#c66fe2d123249af7651ebde8984c51c2
 https://conda.anaconda.org/conda-forge/linux-64/lz4-c-1.9.4-hcb278e6_0.conda#318b08df404f9c9be5712aaa5a6f0bb0
 https://conda.anaconda.org/conda-forge/linux-64/mpg123-1.31.3-hcb278e6_0.conda#141a126675b6d1a4eabb111a4a353898
 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179
 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1
-https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.1-hd590300_1.conda#2e1d7b458ac8f1e3ca4e18b77add6277
+https://conda.anaconda.org/conda-forge/linux-64/openssl-3.1.2-hd590300_0.conda#e5ac5227582d6c83ccf247288c0eb095
 https://conda.anaconda.org/conda-forge/linux-64/pixman-0.40.0-h36c2ea0_0.tar.bz2#660e72c82f2e75a6b3fe6a6e75c79f19
 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
+https://conda.anaconda.org/conda-forge/linux-64/rav1e-0.6.6-he8a937b_2.conda#77d9955b4abddb811cb8ab1aa7d743e4
 https://conda.anaconda.org/conda-forge/linux-64/snappy-1.1.10-h9fff704_0.conda#e6d228cd0bb74a51dd18f5bfce0b4115
-https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.38-h0b41bf4_0.conda#9ac34337e5101a87e5d91da05d84aa48
+https://conda.anaconda.org/conda-forge/linux-64/svt-av1-1.7.0-h59595ed_0.conda#b6e0b4f1edc2740d1cf87669195c39d4
 https://conda.anaconda.org/conda-forge/linux-64/xorg-kbproto-1.0.7-h7f98852_1002.tar.bz2#4b230e8381279d76131116660f5a241a
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libice-1.1.1-hd590300_0.conda#b462a33c0be1421532f28bfe8f4a7514
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
@@ -76,242 +73,237 @@ https://conda.anaconda.org/conda-forge/linux-64/xorg-xextproto-7.3.0-h0b41bf4_10
 https://conda.anaconda.org/conda-forge/linux-64/xorg-xf86vidmodeproto-2.3.1-h7f98852_1002.tar.bz2#3ceea9668625c18f19530de98b15d5b0
 https://conda.anaconda.org/conda-forge/linux-64/xorg-xproto-7.0.31-h7f98852_1007.tar.bz2#b4a4381d54784606820704f7b5f05a15
 https://conda.anaconda.org/conda-forge/linux-64/xz-5.2.6-h166bdaf_0.tar.bz2#2161070d867d1b1204ea749c8eec4ef0
-https://conda.anaconda.org/conda-forge/linux-64/yaml-0.2.5-h7f98852_2.tar.bz2#4cb3ad778ec2d5a7acbdf254eb1c42ae
 https://conda.anaconda.org/conda-forge/linux-64/zfp-1.0.0-h27087fc_3.tar.bz2#0428af0510c3fafedf1c66b43102a34b
 https://conda.anaconda.org/conda-forge/linux-64/zlib-ng-2.0.7-h0b41bf4_0.conda#49e8329110001f04923fe7e864990b0c
 https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9b5aca60558d02ddaa09d599e55920
-https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-11.3.0-hab1b70f_19.tar.bz2#89ac16d36e66ccb9ca5d34c9217e5799
-https://conda.anaconda.org/conda-forge/linux-64/libavif-0.11.1-h8182462_2.conda#41c399ed4c439e37b844c24ab5621b5a
-https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-17_linux64_openblas.conda#57fb44770b1bc832fb2dbefa1bd502de
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25
-https://conda.anaconda.org/conda-forge/linux-64/libcap-2.67-he9d0100_0.conda#d05556c80caffff164d17bdea0105a1a
+https://conda.anaconda.org/conda-forge/linux-64/gcc_impl_linux-64-12.3.0-he2b93b0_1.conda#fbb52c3c2fb330851e8bfc5a5ad69dc4
+https://conda.anaconda.org/conda-forge/linux-64/libavif16-1.0.1-h014f275_1.conda#b18d2420e0815d39c035e3bd038d2bfd
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.1.0-hd590300_0.conda#43017394a280a42b48d11d2a6e169901
+https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.1.0-hd590300_0.conda#8e3e1cb77c4b355a3776bdfb74095bed
+https://conda.anaconda.org/conda-forge/linux-64/libcap-2.69-h0f662aa_0.conda#25cb5999faa414e5ccb2c1388f62d3d5
 https://conda.anaconda.org/conda-forge/linux-64/libedit-3.1.20191231-he28a2e2_2.tar.bz2#4d331e44109e3f0e19b4cb8f9b82f3e1
 https://conda.anaconda.org/conda-forge/linux-64/libevent-2.1.12-hf998b51_1.conda#a1cfcc585f0c42bf8d5546bb1dfb668d
-https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.2-h27087fc_0.tar.bz2#7daf72d8e2a8e848e11d63ed6d1026e0
-https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.46-h620e276_0.conda#27e745f6f2e4b757e95dd7225fbe6bdb
-https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.52.0-h61bc06f_0.conda#613955a50485812985c059e7b269f42e
+https://conda.anaconda.org/conda-forge/linux-64/libflac-1.4.3-h59595ed_0.conda#ee48bf17cc83a00f59ca1494d5646869
+https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-13.2.0-h69a702a_1.conda#394218a92951499aed2ab1bafb30b570
+https://conda.anaconda.org/conda-forge/linux-64/libgpg-error-1.47-h71f35ed_0.conda#c2097d0b46367996f09b4e8e4920384a
 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416
-https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7
-https://conda.anaconda.org/conda-forge/linux-64/libssh2-1.11.0-h0841786_0.conda#1f5a58e686b13bcfde88b93f547d23fe
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.43.0-h2797004_0.conda#903fa782a9067d5934210df6d79220f6
 https://conda.anaconda.org/conda-forge/linux-64/libvorbis-1.3.7-h9c3ff4c_0.tar.bz2#309dec04b70a3cc0f1e84a4013683bc0
 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.15-h0b41bf4_0.conda#33277193f5b92bad9fdd230eb700929c
-https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.4-h0d562d8_0.conda#e46fad17d5fb57316b956f88dca765e4
-https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.32-hf1915f5_2.conda#cf4a8f520fdad3a63bb2bce74576cd2d
-https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.23-pthreads_h855a84d_0.conda#ba8810202f8879562f01b4f9957c1ada
+https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.11.5-h232c23b_1.conda#f3858448893839820d4bcfb14ad3ecdf
+https://conda.anaconda.org/conda-forge/linux-64/mysql-common-8.0.33-hf1915f5_4.conda#f6f0ac5665849afc0716213a6cff224d
 https://conda.anaconda.org/conda-forge/linux-64/pcre2-10.40-hc3806b6_0.tar.bz2#69e2c796349cd9b273890bee0febfe1b
 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libsm-1.2.4-h7391055_0.conda#93ee23f12bc2e684548181256edd2cf6
-https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295
-https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555
-https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.4-h0f2a231_0.conda#876286b5941933a0f558777e57d883cc
-https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4
-https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.9.2-hb4ffafa_0.conda#e029f773ae3355c8a05ad7c3db2f8a4b
-https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06
-https://conda.anaconda.org/conda-forge/linux-64/gcc-11.3.0-h02d0930_13.conda#ead4470a123fb664e358d02a333676ba
-https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-11.3.0-he6f903b_13.conda#90a9fa7151e709ba224232ea9bfa4fea
-https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-11.3.0-he34c6f7_19.tar.bz2#3de873ee757f1a2e583416a3583f84c4
-https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-11.3.0-hab1b70f_19.tar.bz2#b73564a352e64bb5f2c9bfd3cd6dd127
-https://conda.anaconda.org/conda-forge/linux-64/krb5-1.20.1-h81ceb04_0.conda#89a41adce7106749573d883b2f657d78
-https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-17_linux64_openblas.conda#7ef0969b00fe3d6eef56a8151d3afb29
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589
+https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.5-h0f2a231_0.conda#009521b7ed97cca25f8f997f9e745976
+https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hd590300_0.conda#aeafb07a327e3f14a796bf081ea07472
+https://conda.anaconda.org/conda-forge/linux-64/c-blosc2-2.10.2-hb4ffafa_0.conda#1a88c95afde6f13403492cac91352568
+https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-h267a509_2.conda#9ae35c3d96db2c94ce0cef86efdfa2cb
+https://conda.anaconda.org/conda-forge/linux-64/gcc-12.3.0-h8d2909c_2.conda#e2f2f81f367e14ca1f77a870bda2fe59
+https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-12.3.0-h76fc315_2.conda#11517e7b5c910c5b5d6985c0c7eb7f50
+https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-12.3.0-hfcedea8_1.conda#d7f523db90829829dd1b219fae1e5be0
+https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-12.3.0-he2b93b0_1.conda#9e3eafc891d1fce109f8859159b29994
+https://conda.anaconda.org/conda-forge/linux-64/krb5-1.21.2-h659d440_0.conda#cd95826dbd331ed1be26bdf401432844
 https://conda.anaconda.org/conda-forge/linux-64/libgcrypt-1.10.1-h166bdaf_0.tar.bz2#f967fc95089cd247ceed56eda31de3a9
-https://conda.anaconda.org/conda-forge/linux-64/libglib-2.76.3-hebfc3b9_0.conda#a64f11b244b2c112cd3fa1cbe9493999
-https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-17_linux64_openblas.conda#a2103882c46492e26500fcb56c03de8b
-https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_2.conda#5c0a511fa7d223d8661fefcf77b2a877
-https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.0-hb75c966_0.conda#c648d19cd9c8625898d5d370414de7c7
-https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-ha587672_6.conda#4e5ee4b062c21519efbee7e2ae608748
-https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b
-https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea
-https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.32-hca2cd23_2.conda#20b4708cd04bdc8138d03314ddd97885
-https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099
-https://conda.anaconda.org/conda-forge/linux-64/python-3.9.16-h2782a2a_0_cpython.conda#95c9b7c96a7fd7342e0c9d0a917b8f78
+https://conda.anaconda.org/conda-forge/linux-64/libglib-2.78.0-hebfc3b9_0.conda#e618003da3547216310088478e475945
+https://conda.anaconda.org/conda-forge/linux-64/libllvm15-15.0.7-h5cf9203_3.conda#9efe82d44b76a7529a1d702e5a37752e
+https://conda.anaconda.org/conda-forge/linux-64/libopenblas-0.3.24-pthreads_h413a1c8_0.conda#6e4ef6ca28655124dcde9bd500e44c32
+https://conda.anaconda.org/conda-forge/linux-64/libsndfile-1.2.2-hbc2eb40_0.conda#38f84d395629e48b7c7b48a8ca740341
+https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.6.0-h29866fb_1.conda#4e9afd30f4ccb2f98645e51005f82236
+https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.6-h4dfa4b3_0.conda#b096c85c415519259e731d8fb719a3ef
+https://conda.anaconda.org/conda-forge/linux-64/mysql-libs-8.0.33-hca2cd23_4.conda#db7f2c877209ac620fcd1c3ce7407cf0
+https://conda.anaconda.org/conda-forge/linux-64/nss-3.92-h1d7d5a4_0.conda#22c89a3d87828fe925b310b9cdf0f574
+https://conda.anaconda.org/conda-forge/linux-64/python-3.9.18-h0755675_0_cpython.conda#3ede353bc605068d9677e700b1847382
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-0.4.0-hd590300_1.conda#9bfac7ccd94d54fd21a0501296d60424
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-keysyms-0.4.0-h8ee46fc_1.conda#632413adcd8bc16b515cab87a2932913
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-renderutil-0.3.9-hd590300_1.conda#e995b155d938b6779da6ace6c6b13816
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-wm-0.4.1-h8ee46fc_1.conda#90108a432fb5c6150ccfee3f03388656
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.4-h8ee46fc_1.conda#52d09ea80a42c0466214609ef0a2d62d
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libx11-1.8.6-h8ee46fc_0.conda#7590b76c3d11d21caa44f3fc38ac584a
 https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.13-pyhd8ed1ab_0.conda#06006184e203b61d3525f90de394471e
-https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418
-https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.5.2-h0b41bf4_0.conda#69afb4e35be6366c2c1f9ed7f49bc3e6
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
-https://conda.anaconda.org/conda-forge/noarch/click-8.1.3-unix_pyhd8ed1ab_2.tar.bz2#20e4087407c7cb04a40817114b333dbf
-https://conda.anaconda.org/conda-forge/noarch/cloudpickle-2.2.1-pyhd8ed1ab_0.conda#b325bfc4cff7d7f8a868f1f7ecc4ed16
+https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hd590300_0.conda#3db48055eab680e43a122e2c7494e7ae
+https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py39h3d6467e_0.conda#8a1b6b1f5e230aaf6408d6b0aef3492f
+https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.6.0-hd590300_0.conda#ea6c792f792bdd7ae6e7e2dee32f0a48
+https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
-https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.35-py39h3d6467e_0.conda#019c9509764e66c9d9d38b5ca365a9f4
+https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.36-py39h3d6467e_0.conda#879336ef992db7dd6a9dc4eb1e34ce58
 https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-h5008d03_3.tar.bz2#ecfff944ba3960ecb334b9a2663d708d
 https://conda.anaconda.org/conda-forge/linux-64/docutils-0.19-py39hf3d152e_1.tar.bz2#adb733ec2ee669f6d010758d054da60f
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
-https://conda.anaconda.org/conda-forge/noarch/fsspec-2023.5.0-pyh1a96a4e_0.conda#20edd290b319aa0eff3e9055375756dc
-https://conda.anaconda.org/conda-forge/linux-64/gfortran-11.3.0-ha859ce3_13.conda#dd92c047f03f5288b111117b47fdff3c
-https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-11.3.0-h3c55166_13.conda#cc56575e38eb6bf082654de641476b15
-https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.76.3-hfc55251_0.conda#8951eedf3cdf94dd733c1b5eee1f4880
-https://conda.anaconda.org/conda-forge/linux-64/gxx-11.3.0-h02d0930_13.conda#b8882bac01c133f6f8ac86193c6c00a7
-https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-11.3.0-hc203a17_13.conda#c22e035729c5d224dd875274c92a0522
+https://conda.anaconda.org/conda-forge/linux-64/gfortran-12.3.0-h499e0f7_2.conda#0558a8c44eb7a18e6682bd3a8ae6dcab
+https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-12.3.0-h7fe76b4_2.conda#3a749210487c0358b6f135a648cbbf60
+https://conda.anaconda.org/conda-forge/linux-64/glib-tools-2.78.0-hfc55251_0.conda#e10134de3558dd95abda6987b5548f4f
+https://conda.anaconda.org/conda-forge/linux-64/gxx-12.3.0-h8d2909c_2.conda#673bac341be6b90ef9e8abae7e52ca46
+https://conda.anaconda.org/conda-forge/linux-64/gxx_linux-64-12.3.0-h8a814eb_2.conda#f517b1525e9783849bd56a5dc45a9960
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py39hf939315_1.tar.bz2#41679a052a8ce841c74df1ebc802e411
-https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-haa2dc70_1.conda#980d8aca0bc23ca73fa8caa3e7c84c28
-https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_2.conda#907344cee64101d44d806bbe0fccb01d
-https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h36d4200_3.conda#c9f4416a34bc91e0eb029f912c68f81f
-https://conda.anaconda.org/conda-forge/linux-64/libcurl-8.1.2-h409715c_0.conda#50c873c9660ed116707ae15b663928d8
-https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-17_linux64_openblas.conda#949709aa6ee6a2dcdb3de6dd99147d17
-https://conda.anaconda.org/conda-forge/linux-64/libpq-15.3-hbcd7760_1.conda#8afb2a97d256ffde95b91a6283bc598c
-https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-253-h8c4010b_1.conda#9176b1e2cb8beca37a7510b0e801e38f
-https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py39h7633fee_0.conda#3822b0ae733e022c10469c0e46bdddc4
+https://conda.anaconda.org/conda-forge/noarch/lazy_loader-0.3-pyhd8ed1ab_0.conda#69ea1d0fa7ab33b48c88394ad1dead65
+https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.15-h7f713cb_2.conda#9ab79924a3760f85a799f21bc99bd655
+https://conda.anaconda.org/conda-forge/linux-64/libblas-3.9.0-18_linux64_openblas.conda#bcddbb497582ece559465b9cd11042e7
+https://conda.anaconda.org/conda-forge/linux-64/libclang13-15.0.7-default_h9986a30_3.conda#1720df000b48e31842500323cb7be18c
+https://conda.anaconda.org/conda-forge/linux-64/libcups-2.3.3-h4637d8d_4.conda#d4529f4dff3057982a7617c7ac58fde3
+https://conda.anaconda.org/conda-forge/linux-64/libpq-15.4-hfc447b1_0.conda#b9ce311e7aba8b5fc3122254f0a6e97e
+https://conda.anaconda.org/conda-forge/linux-64/libsystemd0-254-h3516f8a_0.conda#df4b1cd0c91b4234fb02b5701a4cdddc
 https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.3-py39hd1e30aa_0.conda#9c858d105816f454c6b64f3e19184b60
 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
 https://conda.anaconda.org/conda-forge/noarch/networkx-3.1-pyhd8ed1ab_0.conda#254f787d5068bc89f578bf63893ce8b4
-https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.3-py39h6183b62_0.conda#8626d6d5169950ce4b99b082667773f7
-https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea
+https://conda.anaconda.org/conda-forge/linux-64/openblas-0.3.24-pthreads_h7a3da1a_0.conda#ebe8e905b06dfc5b4b40642d34b1d2f3
+https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-h488ebb8_3.conda#128c25b7fe6a25286a48f3a6a9b5b6f3
 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
 https://conda.anaconda.org/conda-forge/noarch/ply-3.11-py_1.tar.bz2#7205635cd71531943440fbfe3b6b5727
 https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.5-py39h72bdee0_0.conda#1d54d3a75c3192ab7655d9c3d16809f1
 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
-https://conda.anaconda.org/conda-forge/noarch/pygments-2.15.1-pyhd8ed1ab_0.conda#d316679235612869eba305aa7d41d9bf
-https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc
+https://conda.anaconda.org/conda-forge/noarch/pygments-2.16.1-pyhd8ed1ab_0.conda#40e5cb18165466773619e5c963f00a7b
+https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb
 https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
 https://conda.anaconda.org/conda-forge/noarch/python-tzdata-2023.3-pyhd8ed1ab_0.conda#2590495f608a63625e165915fb4e2e34
-https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31
-https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0-py39hb9d737c_5.tar.bz2#ef9db3c38ae7275f6b14491cfe61a248
-https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe
+https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3.post1-pyhd8ed1ab_0.conda#c93346b446cd08c169d843ae5fc0da97
+https://conda.anaconda.org/conda-forge/noarch/setuptools-68.2.2-pyhd8ed1ab_0.conda#fc2166155db840c634a1291a5c35a709
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e
-https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.4-pyhd8ed1ab_0.conda#5a31a7d564f551d0e6dff52fd8cb5b16
-https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.2-py_0.tar.bz2#68e01cac9d38d0e717cd5c87bc3d2cc9
-https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.1-pyhd8ed1ab_0.conda#6c8c4d6eb2325e59290ac6dbbeacd5f0
-https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-py_0.tar.bz2#67cd9d9c0382d37479b4d306c369a2d4
-https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.tar.bz2#d01180388e6d1838c3e1ad029590aa7a
-https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7
-https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.2-pyhd8ed1ab_0.conda#7b39e842b52966a99e229739cd4dc36e
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087
+https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.3-pyhd8ed1ab_0.conda#1482e77f87c6a702a7e05ef22c9b197b
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/toml-0.10.2-pyhd8ed1ab_0.tar.bz2#f832c45a477c78bebd107098db465095
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
-https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36
-https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py39hd1e30aa_0.conda#da334eecb1ea2248e28294c49e6f6d89
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py39hd1e30aa_0.conda#ee7f18d58a96b04fdbd2e55f7694ae0d
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.8.0-pyha770c72_0.conda#5b1be40a26d10a06f6d4f1f9e19fa0c7
 https://conda.anaconda.org/conda-forge/linux-64/unicodedata2-15.0.0-py39hb9d737c_0.tar.bz2#230d65004135bf312504a1bbcb0c7a08
-https://conda.anaconda.org/conda-forge/noarch/wheel-0.40.0-pyhd8ed1ab_0.conda#49bb0d9e60ce1db25e151780331bb5f3
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.2-pyhd8ed1ab_0.conda#1ccd092478b3e0ee10d7a891adbf8a4f
 https://conda.anaconda.org/conda-forge/linux-64/xcb-util-image-0.4.0-h8ee46fc_1.conda#9d7bcddf49cbf727730af10e71022c73
+https://conda.anaconda.org/conda-forge/linux-64/xkeyboard-config-2.39-hd590300_0.conda#d88c7fc8a11858fb14761832e4da1954
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxext-1.3.4-h0b41bf4_2.conda#82b6df12252e6f32402b96dacc656fec
-https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.10-h7f98852_1003.tar.bz2#f59c1242cc1dd93e72c2ee2b360979eb
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf
+https://conda.anaconda.org/conda-forge/linux-64/xorg-libxrender-0.9.11-hd590300_0.conda#ed67c36f215b310412b2af935bf3e530
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.16.2-pyhd8ed1ab_0.conda#2da0451b54c4563c32490cb1b7cf68a1
 https://conda.anaconda.org/conda-forge/noarch/babel-2.12.1-pyhd8ed1ab_1.conda#ac432e732804a81ddcf29c92ead57cde
-https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-17_linux64_openblas.conda#fde382e41d77b65315fab79ab93a20ab
 https://conda.anaconda.org/conda-forge/linux-64/brunsli-0.1-h9c3ff4c_0.tar.bz2#c1ac6229d0bfd14f8354ff9ad2a26cad
-https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-hbbf8b49_1016.conda#c1dd96500b9b1a75e9e511931f415cbc
-https://conda.anaconda.org/conda-forge/linux-64/cfitsio-4.2.0-hd9d235c_0.conda#8c57a9adbafd87f5eff842abde599cb4
-https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.0.7-py39h4b4f3f3_0.conda#c5387f3fb1f5b8b71e1c865fc55f4951
-https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.5.2-hf52228f_0.conda#6b3b19e359824b97df7145c8c878c8be
-https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.0-py39hb9d737c_1.tar.bz2#eb31327ace8dac15c2df243d9505a132
-https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.39.4-py39hd1e30aa_0.conda#80605b792f58cf5c78a5b7e20cef1e35
-https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.5.2-hdb1a99f_0.conda#265323e1bd53709aeb739c9b1794b398
-https://conda.anaconda.org/conda-forge/linux-64/glib-2.76.3-hfc55251_0.conda#950e02f5665f5f4ff0437a6acba58798
-https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9
-https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b
+https://conda.anaconda.org/conda-forge/linux-64/cairo-1.16.0-h0c91306_1017.conda#3db543896d34fc6804ddfb9239dcb125
+https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.6.0-h00ab1b0_0.conda#364c6ae36c4e36fcbd4d273cf4db78af
+https://conda.anaconda.org/conda-forge/linux-64/fonttools-4.42.1-py39hd1e30aa_0.conda#de06dc7edaddbd3b60c050f3a95d6fe6
+https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.6.0-heb67821_0.conda#b65c49dda97ae497abcbdf3a8ba0018f
+https://conda.anaconda.org/conda-forge/linux-64/glib-2.78.0-hfc55251_0.conda#2f55a36b549f51a7e0c2b1e3c3f0ccd4
+https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.8.0-pyha770c72_0.conda#4e9f59a060c3be52bc4ddc46ee9b6946
+https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.0.1-pyhd8ed1ab_0.conda#d978c61aa5fc2c69380d53ad56b5ae86
 https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.2-pyhd8ed1ab_1.tar.bz2#c8490ed5c70966d232fdd389d0dbed37
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
-https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_2.conda#1a4fe5162abe4a19b5a9dedf158a0ff9
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
+https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.9.0-18_linux64_openblas.conda#93dd9ab275ad888ed8113953769af78c
+https://conda.anaconda.org/conda-forge/linux-64/libclang-15.0.7-default_h7634d5b_3.conda#0922208521c0463e690bbaebba7eb551
+https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.9.0-18_linux64_openblas.conda#a1244707531e5b143c420c70573c8ec5
+https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-1.5.0-h5d7e998_3.conda#c91ea308d7bf70b62ddda568478aa03b
 https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b
-https://conda.anaconda.org/conda-forge/noarch/partd-1.4.0-pyhd8ed1ab_0.conda#721dab5803ea92ce02ddc4ee50aa0c48
-https://conda.anaconda.org/conda-forge/linux-64/pillow-9.5.0-py39haaeba84_1.conda#d7aa9b99ed6ade75fbab1e4cedcb3ce2
-https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df
-https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.1-pyhd8ed1ab_0.conda#f64bedfdb8e3f93ac69b84f530397d0e
-https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_4.conda#8f349ca16d30950aa00870484d9d30c4
+https://conda.anaconda.org/conda-forge/linux-64/pillow-10.0.1-py39h444a776_0.conda#9801e45aec22a265bb22e6a57275be7f
+https://conda.anaconda.org/conda-forge/noarch/pip-23.2.1-pyhd8ed1ab_0.conda#e2783aa3f9235225eec92f9081c5b801
+https://conda.anaconda.org/conda-forge/noarch/plotly-5.17.0-pyhd8ed1ab_0.conda#76a0b213abcd3ffc1e8fa78804b69dc0
+https://conda.anaconda.org/conda-forge/linux-64/pulseaudio-client-16.1-hb77b528_5.conda#ac902ff3c1c6d750dd0dfc93a974ab74
+https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.2-pyhd8ed1ab_0.conda#6dd662ff5ac9a783e5c940ce9f3fe649
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.4.1-py39h389d5f1_0.conda#9eeb2b2549f836ca196c6cbd22344122
-https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.9-py39h3d6467e_0.conda#6d990f672cc70e5c480ddb74b789a17c
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d
-https://conda.anaconda.org/conda-forge/linux-64/blas-2.117-openblas.conda#54b4b02b897156056f3056f992261d0c
-https://conda.anaconda.org/conda-forge/linux-64/compilers-1.5.2-ha770c72_0.conda#f95226244ee1c487cf53272f971323f4
-https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.3-h977cf35_1.conda#410ed3b168e5a139d12ebaf4143072cd
-https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-7.3.0-hdb3a94d_0.conda#765bc76c0dfaf24ff9d8a2935b2510df
-https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2023.1.23-py39h9e8eca3_2.conda#32dec18d45c74c2405021eb8dc42a521
-https://conda.anaconda.org/conda-forge/noarch/imageio-2.28.1-pyh24c5eb1_0.conda#ef3541a8cd9a55879932486a097b7fed
-https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b
-https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-6.6.0-hd8ed1ab_0.conda#3cbc9615f10a3d471532b83e4250b971
-https://conda.anaconda.org/conda-forge/linux-64/pandas-2.0.2-py39h40cae4c_0.conda#de99b3f807c0b295a7df94623df0fb4c
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35
-https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.11.0-py39h227be39_3.conda#9e381db00691e26bcf670c3586397be1
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4
+https://conda.anaconda.org/conda-forge/linux-64/sip-6.7.11-py39h3d6467e_0.conda#4eaef850715aff114e2126a2f1a7b1f0
+https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.8.0-hd8ed1ab_0.conda#384462e63262a527bda564fa2d9126c0
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e
+https://conda.anaconda.org/conda-forge/linux-64/compilers-1.6.0-ha770c72_0.conda#e2259de4640a51a28c21931ae98e4975
+https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.22.5-h98fc4e7_1.conda#483fe58e14ba244110cd1be2b771b70f
+https://conda.anaconda.org/conda-forge/linux-64/harfbuzz-8.2.1-h3d44ed6_0.conda#98db5f8813f45e2b29766aff0e4a499c
+https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.0.1-pyhd8ed1ab_0.conda#54661981fd331e20847d8a49543dd9af
+https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.9.0-18_linux64_openblas.conda#4bfcdd0ff8353918e573768e5c72dffa
+https://conda.anaconda.org/conda-forge/linux-64/numpy-1.26.0-py39h6183b62_0.conda#a50279322335a176d74ed167f9ce468b
+https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236
+https://conda.anaconda.org/conda-forge/linux-64/pyqt5-sip-12.12.2-py39h3d6467e_4.conda#b83a218fa97e9963c858d0db651a7506
+https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
-https://conda.anaconda.org/conda-forge/noarch/dask-core-2023.5.1-pyhd8ed1ab_0.conda#b90a2dec6d308d71649dbe58dc32c337
-https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.3-h938bd60_1.conda#1f317eb7f00db75f4112a07476345376
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.7.1-py39he190548_0.conda#f2a931db797bb58bd335f4a857b4c898
+https://conda.anaconda.org/conda-forge/linux-64/blas-devel-3.9.0-18_linux64_openblas.conda#1f053c648ccac13b438c754f553e3051
+https://conda.anaconda.org/conda-forge/linux-64/contourpy-1.1.1-py39h7633fee_0.conda#b673f03c191683996e66c881f90aff2b
+https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.22.5-h8e1006c_1.conda#98206c865fccdea9723f0c6f9241a24f
+https://conda.anaconda.org/conda-forge/linux-64/imagecodecs-2023.9.4-py39h43e5ab6_2.conda#0fa1dc12723102f1f30ed684718c3f42
+https://conda.anaconda.org/conda-forge/noarch/imageio-2.31.1-pyh24c5eb1_0.conda#1051cc0376612ba101d4f59e954a1ff4
+https://conda.anaconda.org/conda-forge/linux-64/pandas-2.1.0-py39hddac248_0.conda#0a3624f600f51df010a274176e356ac5
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6
-https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
-https://conda.anaconda.org/conda-forge/noarch/sphinx-6.0.0-pyhd8ed1ab_2.conda#ac1d3b55da1669ee3a56973054fd7efb
-https://conda.anaconda.org/conda-forge/noarch/tifffile-2023.4.12-pyhd8ed1ab_0.conda#b2ade33a630dada190c1220f3515fc5c
-https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.5.0-pyhd8ed1ab_0.tar.bz2#3c275d7168a6a135329f4acb364c229a
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
-https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-h01ceb2d_13.conda#99ca83a166224f46a62c9545b8d66401
-https://conda.anaconda.org/conda-forge/linux-64/scipy-1.10.1-py39h6183b62_3.conda#84c4007675da392fdb99faeefda69552
-https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.13.0-pyhd8ed1ab_0.conda#26c51b97ce59bbcce6a35ff45bc5c900
-https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2
-https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.8.2-pyhd8ed1ab_0.conda#7f330c6004309c83cc63aed39b70d711
+https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.4.1-py39h389d5f1_0.conda#9eeb2b2549f836ca196c6cbd22344122
+https://conda.anaconda.org/conda-forge/linux-64/scipy-1.11.2-py39h474f0d3_1.conda#f62409d868e23c1f97ae2b0db5658385
+https://conda.anaconda.org/conda-forge/linux-64/blas-2.118-openblas.conda#5cdc7179ced67da3f47d62097304ae1f
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.8.0-py39he9076e7_0.conda#a529a20267af9f085c7f991cae79fef2
 https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.3-pyhd8ed1ab_0.tar.bz2#50ef6b29b1fb0768ca82c5aeb4fb2d96
-https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.0-py39h9ff65d1_0.conda#b68d27031efaec0ebab9d20d52135abd
-https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.7-py39h5c7b992_3.conda#19e30314fe824605750da905febb8ee6
-https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.19.3-py39h4661b88_2.tar.bz2#a8d53b12aedcd84107ba8c85c81be56f
+https://conda.anaconda.org/conda-forge/linux-64/pyamg-5.0.1-py39hf86192f_0.conda#fe4ba21222a44b71db1fbbf8d033a385
+https://conda.anaconda.org/conda-forge/linux-64/qt-main-5.15.8-hc47bfe8_16.conda#a8dd2dfcd570e3965c73be6c5e03e74f
+https://conda.anaconda.org/conda-forge/noarch/tifffile-2023.8.30-pyhd8ed1ab_0.conda#529b803c040449392bc480614f41d522
+https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.15.9-py39h52134e7_4.conda#e12391692d70732bf1df08b7ecf40095
+https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.21.0-py39h3d6467e_0.conda#8259084f3ed10a5f3a7a30f8eb8a4500
 https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.2-pyhd8ed1ab_0.conda#cf88f3a1c11536bc3c10c14ad00ccc42
-https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.7.1-py39hf3d152e_0.conda#682772fa385911fb5efffbce21b269c5
 https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.14.0-py39h0f8d45d_1.conda#b4f7f4de7614a8406935f56b1eef6a75
+https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.8.0-py39hf3d152e_0.conda#e348333b50ff1f978f3d6af24512de0b
 https://conda.anaconda.org/conda-forge/noarch/seaborn-0.12.2-hd8ed1ab_0.conda#50847a47c07812f88581081c620f5160
+https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.5.0-pyhd8ed1ab_0.tar.bz2#3c275d7168a6a135329f4acb364c229a
+https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995
+https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.14.0-pyhd8ed1ab_0.conda#b3788794f88c9512393032e448428261
+https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.4.0-pyhd8ed1ab_0.tar.bz2#88ee91e8679603f2a5bd036d52919cc2
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.7-pyhd8ed1ab_0.conda#aebfabcb60c33a89c1f9290cab49bc93
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.5-pyhd8ed1ab_0.conda#ebf08f5184d8eaa486697bc060031953
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.4-pyhd8ed1ab_0.conda#a9a89000dfd19656ad004b937eeb6828
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.6-pyhd8ed1ab_0.conda#cf5c9649272c677a964a7313279e3a9b
+https://conda.anaconda.org/conda-forge/noarch/sphinx-6.0.0-pyhd8ed1ab_2.conda#ac1d3b55da1669ee3a56973054fd7efb
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.9-pyhd8ed1ab_0.conda#0612e497d7860728f2cda421ea2aec09
+https://conda.anaconda.org/conda-forge/noarch/sphinxext-opengraph-0.8.2-pyhd8ed1ab_0.conda#7f330c6004309c83cc63aed39b70d711
 # pip attrs @ https://files.pythonhosted.org/packages/f0/eb/fcb708c7bf5056045e9e98f62b93bd7467eb718b0202e7698eb11d66416c/attrs-23.1.0-py3-none-any.whl#sha256=1f28b4522cdc2fb4256ac1a020c78acf9cba2c6b461ccd2c126f3aa8e8335d04
+# pip cloudpickle @ https://files.pythonhosted.org/packages/15/80/44286939ca215e88fa827b2aeb6fa3fd2b4a7af322485c7170d6f9fd96e0/cloudpickle-2.2.1-py3-none-any.whl#sha256=61f594d1f4c295fa5cd9014ceb3a1fc4a70b0de1164b94fbc2d854ccba056f9f
 # pip defusedxml @ https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl#sha256=a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61
-# pip fastjsonschema @ https://files.pythonhosted.org/packages/d4/a1/b3816c7945742ee95e2fb68dd59aaa205c9ce53ffd90704f70c2207a7b4d/fastjsonschema-2.17.1-py3-none-any.whl#sha256=4b90b252628ca695280924d863fe37234eebadc29c5360d322571233dc9746e0
+# pip fastjsonschema @ https://files.pythonhosted.org/packages/9d/93/a3ca3cdeb84065d7d8f8df4cb09ab44405f109183c1d2b915ec17574e6b1/fastjsonschema-2.18.0-py3-none-any.whl#sha256=128039912a11a807068a7c87d0da36660afbfd7202780db26c4aa7153cfdc799
 # pip fqdn @ https://files.pythonhosted.org/packages/cf/58/8acf1b3e91c58313ce5cb67df61001fc9dcd21be4fadb76c1a2d540e09ed/fqdn-1.5.1-py3-none-any.whl#sha256=3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014
 # pip json5 @ https://files.pythonhosted.org/packages/70/ba/fa37123a86ae8287d6678535a944f9c3377d8165e536310ed6f6cb0f0c0e/json5-0.9.14-py2.py3-none-any.whl#sha256=740c7f1b9e584a468dbb2939d8d458db3427f2c93ae2139d05f47e453eae964f
-# pip jsonpointer @ https://files.pythonhosted.org/packages/a3/be/8dc9d31b50e38172c8020c40f497ce8debdb721545ddb9fcb7cca89ea9e6/jsonpointer-2.3-py2.py3-none-any.whl#sha256=51801e558539b4e9cd268638c078c6c5746c9ac96bc38152d443400e4f3793e9
+# pip jsonpointer @ https://files.pythonhosted.org/packages/12/f6/0232cc0c617e195f06f810534d00b74d2f348fe71b2118009ad8ad31f878/jsonpointer-2.4-py2.py3-none-any.whl#sha256=15d51bba20eea3165644553647711d150376234112651b4f1811022aecad7d7a
 # pip jupyterlab-pygments @ https://files.pythonhosted.org/packages/c0/7e/c3d1df3ae9b41686e664051daedbd70eea2e1d2bd9d9c33e7e1455bc9f96/jupyterlab_pygments-0.2.2-py2.py3-none-any.whl#sha256=2405800db07c9f770863bcf8049a529c3dd4d3e28536638bd7c1c01d2748309f
-# pip mistune @ https://files.pythonhosted.org/packages/9f/e5/780d22d19543f339aad583304f58002975b586757aa590cbe7bea5cc6f13/mistune-2.0.5-py2.py3-none-any.whl#sha256=bad7f5d431886fcbaf5f758118ecff70d31f75231b34024a1341120340a65ce8
-# pip overrides @ https://files.pythonhosted.org/packages/7f/36/3fef66c2bf1f66f35538a6703aca0447114b1873913c403f0ea589721aae/overrides-7.3.1-py3-none-any.whl#sha256=6187d8710a935d09b0bcef8238301d6ee2569d2ac1ae0ec39a8c7924e27f58ca
+# pip mistune @ https://files.pythonhosted.org/packages/cc/c0/ac9587149e37cde62ae338e9db8241ae2fdc79a84bde8c8ba7caea2c22d8/mistune-3.0.1-py3-none-any.whl#sha256=b9b3e438efbb57c62b5beb5e134dab664800bdf1284a7ee09e8b12b13eb1aac6
+# pip overrides @ https://files.pythonhosted.org/packages/da/28/3fa6ef8297302fc7b3844980b6c5dbc71cdbd4b61e9b2591234214d5ab39/overrides-7.4.0-py3-none-any.whl#sha256=3ad24583f86d6d7a49049695efe9933e67ba62f0c7625d53c59fa832ce4b8b7d
 # pip pandocfilters @ https://files.pythonhosted.org/packages/5e/a8/878258cffd53202a6cc1903c226cf09e58ae3df6b09f8ddfa98033286637/pandocfilters-1.5.0-py2.py3-none-any.whl#sha256=33aae3f25fd1a026079f5d27bdd52496f0e0803b3469282162bafdcbdf6ef14f
 # pip pkginfo @ https://files.pythonhosted.org/packages/b3/f2/6e95c86a23a30fa205ea6303a524b20cbae27fbee69216377e3d95266406/pkginfo-1.9.6-py3-none-any.whl#sha256=4b7a555a6d5a22169fcc9cf7bfd78d296b0361adad412a346c1226849af5e546
-# pip prometheus-client @ https://files.pythonhosted.org/packages/5b/62/75fc6f255e214ff0a8bd3267a0bd337521dd24f76cd593c10795e488f41b/prometheus_client-0.17.0-py3-none-any.whl#sha256=a77b708cf083f4d1a3fb3ce5c95b4afa32b9c521ae363354a4a910204ea095ce
+# pip prometheus-client @ https://files.pythonhosted.org/packages/ad/b3/6e18c89bf6bd120590ea538a62cae16dc763ff2745b18377c4be5495c4aa/prometheus_client-0.17.1-py3-none-any.whl#sha256=e537f37160f6807b8202a6fc4764cdd19bac5480ddd3e0d463c3002b34462101
 # pip ptyprocess @ https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl#sha256=4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35
 # pip pycparser @ https://files.pythonhosted.org/packages/62/d5/5f610ebe421e85889f2e55e33b7f9a6795bd982198517d912eb1c76e1a53/pycparser-2.21-py2.py3-none-any.whl#sha256=8ee45429555515e1f6b185e78100aea234072576aa43ab53aefcae078162fca9
-# pip pyrsistent @ https://files.pythonhosted.org/packages/64/de/375aa14daaee107f987da76ca32f7a907fea00fa8b8afb67dc09bec0de91/pyrsistent-0.19.3-py3-none-any.whl#sha256=ccf0d6bd208f8111179f0c26fdf84ed7c3891982f2edaeae7422575f47e66b64
 # pip python-json-logger @ https://files.pythonhosted.org/packages/35/a6/145655273568ee78a581e734cf35beb9e33a370b29c5d3c8fee3744de29f/python_json_logger-2.0.7-py3-none-any.whl#sha256=f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd
+# pip pyyaml @ https://files.pythonhosted.org/packages/7d/39/472f2554a0f1e825bd7c5afc11c817cd7a2f3657460f7159f691fbb37c51/PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c
 # pip rfc3986-validator @ https://files.pythonhosted.org/packages/9e/51/17023c0f8f1869d8806b979a2bffa3f861f26a3f1a66b094288323fba52f/rfc3986_validator-0.1.1-py2.py3-none-any.whl#sha256=2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9
+# pip rpds-py @ https://files.pythonhosted.org/packages/82/29/8fc73714f40f83cc995d590f04ae46b152663a9e7f3f94ddee3765125870/rpds_py-0.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=e8c71ea77536149e36c4c784f6d420ffd20bea041e3ba21ed021cb40ce58e2c9
 # pip send2trash @ https://files.pythonhosted.org/packages/a9/78/e4df1e080ed790acf3a704edf521006dd96b9841bd2e2a462c0d255e0565/Send2Trash-1.8.2-py3-none-any.whl#sha256=a384719d99c07ce1eefd6905d2decb6f8b7ed054025bb0e618919f945de4f679
 # pip sniffio @ https://files.pythonhosted.org/packages/c3/a0/5dba8ed157b0136607c7f2151db695885606968d1fae123dc3391e0cfdbf/sniffio-1.3.0-py3-none-any.whl#sha256=eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384
-# pip soupsieve @ https://files.pythonhosted.org/packages/49/37/673d6490efc51ec46d198c75903d99de59baffdd47aea3d071b80a9e4e89/soupsieve-2.4.1-py3-none-any.whl#sha256=1c1bfee6819544a3447586c889157365a27e10d88cde3ad3da0cf0ddf646feb8
-# pip traitlets @ https://files.pythonhosted.org/packages/77/75/c28e9ef7abec2b7e9ff35aea3e0be6c1aceaf7873c26c95ae1f0d594de71/traitlets-5.9.0-py3-none-any.whl#sha256=9e6ec080259b9a5940c797d58b613b5e31441c2257b87c2e795c5228ae80d2d8
-# pip uri-template @ https://files.pythonhosted.org/packages/c0/db/d4f9c75b43541f7235daf4d13eb43f4491f9d5f5df45ce41daeed3a903f6/uri_template-1.2.0-py3-none-any.whl#sha256=f1699c77b73b925cf4937eae31ab282a86dc885c333f2e942513f08f691fc7db
+# pip soupsieve @ https://files.pythonhosted.org/packages/4c/f3/038b302fdfbe3be7da016777069f26ceefe11a681055ea1f7817546508e3/soupsieve-2.5-py3-none-any.whl#sha256=eaa337ff55a1579b6549dc679565eac1e3d000563bcb1c8ab0d0fefbc0c2cdc7
+# pip traitlets @ https://files.pythonhosted.org/packages/fb/00/78472b256929614443c3fa3be31ee60777e5a9e3c6770d8d934154aa2cab/traitlets-5.10.0-py3-none-any.whl#sha256=417745a96681fbb358e723d5346a547521f36e9bd0d50ba7ab368fff5d67aa54
+# pip uri-template @ https://files.pythonhosted.org/packages/e7/00/3fca040d7cf8a32776d3d81a00c8ee7457e00f80c649f1e4a863c8321ae9/uri_template-1.3.0-py3-none-any.whl#sha256=a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363
 # pip webcolors @ https://files.pythonhosted.org/packages/d5/e1/3e9013159b4cbb71df9bd7611cbf90dc2c621c8aeeb677fc41dad72f2261/webcolors-1.13-py3-none-any.whl#sha256=29bc7e8752c0a1bd4a1f03c14d6e6a72e93d82193738fa860cbff59d0fcc11bf
 # pip webencodings @ https://files.pythonhosted.org/packages/f4/24/2a3e3df732393fed8b3ebf2ec078f05546de641fe1b667ee316ec1dcf3b7/webencodings-0.5.1-py2.py3-none-any.whl#sha256=a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78
-# pip websocket-client @ https://files.pythonhosted.org/packages/86/5c/2ebfbb7d4dbb7f35a1f70c40d003f7844d78945ac7c69757067ebaea9c78/websocket_client-1.5.2-py3-none-any.whl#sha256=f8c64e28cd700e7ba1f04350d66422b6833b82a796b525a51e740b8cc8dab4b1
-# pip anyio @ https://files.pythonhosted.org/packages/68/fe/7ce1926952c8a403b35029e194555558514b365ad77d75125f521a2bec62/anyio-3.7.0-py3-none-any.whl#sha256=eddca883c4175f14df8aedce21054bfca3adb70ffe76a9f607aef9d7fa2ea7f0
+# pip websocket-client @ https://files.pythonhosted.org/packages/0b/50/49e0d7342e5d441d43b525d6c84656ea40aea3e58d530004d07b22bc9b04/websocket_client-1.6.3-py3-none-any.whl#sha256=6cfc30d051ebabb73a5fa246efdcc14c8fbebbd0330f8984ac3bb6d9edd2ad03
+# pip anyio @ https://files.pythonhosted.org/packages/36/55/ad4de788d84a630656ece71059665e01ca793c04294c463fd84132f40fe6/anyio-4.0.0-py3-none-any.whl#sha256=cfdb2b588b9fc25ede96d8db56ed50848b0b649dca3dd1df0b11f683bb9e0b5f
 # pip arrow @ https://files.pythonhosted.org/packages/67/67/4bca5a595e2f89bff271724ddb1098e6c9e16f7f3d018d120255e3c30313/arrow-1.2.3-py3-none-any.whl#sha256=5a49ab92e3b7b71d96cd6bfcc4df14efefc9dfa96ea19045815914a6ab6b1fe2
 # pip beautifulsoup4 @ https://files.pythonhosted.org/packages/57/f4/a69c20ee4f660081a7dedb1ac57f29be9378e04edfcb90c526b923d4bebc/beautifulsoup4-4.12.2-py3-none-any.whl#sha256=bd2520ca0d9d7d12694a53d44ac482d181b4ec1888909b035a3dbf40d0f57d4a
 # pip bleach @ https://files.pythonhosted.org/packages/ac/e2/dfcab68c9b2e7800c8f06b85c76e5f978d05b195a958daa9b1dda54a1db6/bleach-6.0.0-py3-none-any.whl#sha256=33c16e3353dbd13028ab4799a0f89a83f113405c766e9c122df8a06f5b85b3f4
 # pip cffi @ https://files.pythonhosted.org/packages/2d/86/3ca57cddfa0419f6a95d1c8478f8f622ba597e3581fd501bbb915b20eb75/cffi-1.15.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=5d598b938678ebf3c67377cdd45e09d431369c3b1a5b331058c338e201f12b27
 # pip doit @ https://files.pythonhosted.org/packages/44/83/a2960d2c975836daa629a73995134fd86520c101412578c57da3d2aa71ee/doit-0.36.0-py3-none-any.whl#sha256=ebc285f6666871b5300091c26eafdff3de968a6bd60ea35dd1e3fc6f2e32479a
-# pip jupyter-core @ https://files.pythonhosted.org/packages/41/1e/92a67f333b9335f04ce409799c030dcfb291712658b9d9d13997f7c91e5a/jupyter_core-5.3.0-py3-none-any.whl#sha256=d4201af84559bc8c70cead287e1ab94aeef3c512848dde077b7684b54d67730d
+# pip jupyter-core @ https://files.pythonhosted.org/packages/8c/e0/3f9061c5e99a03612510f892647b15a91f910c5275b7b77c6c72edae1494/jupyter_core-5.3.1-py3-none-any.whl#sha256=ae9036db959a71ec1cac33081eeb040a79e681f08ab68b0883e9a676c7a90dce
+# pip referencing @ https://files.pythonhosted.org/packages/be/8e/56d6f1e2d591f4d6cbcba446cac4a1b0dc4f584537e2071d9bcee8eeab6b/referencing-0.30.2-py3-none-any.whl#sha256=449b6669b6121a9e96a7f9e410b245d471e8d48964c67113ce9afe50c8dd7bdf
 # pip rfc3339-validator @ https://files.pythonhosted.org/packages/7b/44/4e421b96b67b2daff264473f7465db72fbdf36a07e05494f50300cc7b0c6/rfc3339_validator-0.1.4-py2.py3-none-any.whl#sha256=24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa
 # pip terminado @ https://files.pythonhosted.org/packages/84/a7/c7628d79651b8c8c775d27b374315a825141b5783512e82026fb210dd639/terminado-0.17.1-py3-none-any.whl#sha256=8650d44334eba354dd591129ca3124a6ba42c3d5b70df5051b6921d506fdaeae
 # pip tinycss2 @ https://files.pythonhosted.org/packages/da/99/fd23634d6962c2791fb8cb6ccae1f05dcbfc39bce36bba8b1c9a8d92eae8/tinycss2-1.2.1-py3-none-any.whl#sha256=2b80a96d41e7c3914b8cda8bc7f705a4d9c49275616e886103dd839dfc847847
 # pip argon2-cffi-bindings @ https://files.pythonhosted.org/packages/ec/f7/378254e6dd7ae6f31fe40c8649eea7d4832a42243acaf0f1fff9083b2bed/argon2_cffi_bindings-21.2.0-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl#sha256=b746dba803a79238e925d9046a63aa26bf86ab2a2fe74ce6b009a1c3f5c8f2ae
 # pip isoduration @ https://files.pythonhosted.org/packages/7b/55/e5326141505c5d5e34c5e0935d2908a74e4561eca44108fbfb9c13d2911a/isoduration-20.11.0-py3-none-any.whl#sha256=b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042
+# pip jsonschema-specifications @ https://files.pythonhosted.org/packages/1c/24/83349ac2189cc2435e84da3f69ba3c97314d3c0622628e55171c6798ed80/jsonschema_specifications-2023.7.1-py3-none-any.whl#sha256=05adf340b659828a004220a9613be00fa3f223f2b82002e273dee62fd50524b1
 # pip jupyter-server-terminals @ https://files.pythonhosted.org/packages/ea/7f/36db12bdb90f5237766dcbf59892198daab7260acbcf03fc75e2a2a82672/jupyter_server_terminals-0.4.4-py3-none-any.whl#sha256=75779164661cec02a8758a5311e18bb8eb70c4e86c6b699403100f1585a12a36
-# pip jupyterlite-core @ https://files.pythonhosted.org/packages/5e/25/dd347708151764152b75f6606c02e2571e1228ba52c28502495b48ac17d8/jupyterlite_core-0.1.0-py3-none-any.whl#sha256=2f17afa282447594cb38f7a1d2619ceface28d8f4747e038790cac22c394e804
-# pip pyzmq @ https://files.pythonhosted.org/packages/94/4b/1093172b73984b568d9f1a72bcd61793822fab40aa571f5d6ed9db6234cb/pyzmq-25.1.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=4c2fc7aad520a97d64ffc98190fce6b64152bde57a10c704b337082679e74f67
-# pip argon2-cffi @ https://files.pythonhosted.org/packages/a8/07/946d5a9431bae05a776a59746ec385fbb79b526738d25e4202d3e0bbf7f4/argon2_cffi-21.3.0-py3-none-any.whl#sha256=8c976986f2c5c0e5000919e6de187906cfd81fb1c72bf9d88c01177e77da7f80
-# pip jsonschema @ https://files.pythonhosted.org/packages/c1/97/c698bd9350f307daad79dd740806e1a59becd693bd11443a0f531e3229b3/jsonschema-4.17.3-py3-none-any.whl#sha256=a870ad254da1a8ca84b6a2905cac29d265f805acc57af304784962a2aa6508f6
-# pip jupyter-client @ https://files.pythonhosted.org/packages/07/37/4019d2c41ca333c08dfdfeb84c0fc0368c8defbbd3c8f0c9a530851e5813/jupyter_client-8.2.0-py3-none-any.whl#sha256=b18219aa695d39e2ad570533e0d71fb7881d35a873051054a84ee2a17c4b7389
-# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/0a/26/fe1ffe15bcec2a78fe50480d463166484869605947636056425381d2542b/jupyterlite_pyodide_kernel-0.0.8-py3-none-any.whl#sha256=32c83f99c4b8aebd5d351c6a8a4abc153c3a11f2d1ed62efc872d56224437ef6
-# pip jupyter-events @ https://files.pythonhosted.org/packages/ee/14/e11a93c1b47a69432ee7898f1b55f1da27f2f93b009a34dbdafb9b903f81/jupyter_events-0.6.3-py3-none-any.whl#sha256=57a2749f87ba387cd1bfd9b22a0875b889237dbf2edc2121ebb22bde47036c17
-# pip nbformat @ https://files.pythonhosted.org/packages/e1/ce/7f0f454b4e7f1cb31345f9f977bdce7486033a1c08b5945b17ea95c4afbc/nbformat-5.9.0-py3-none-any.whl#sha256=8c8fa16d6d05062c26177754bfbfac22de644888e2ef69d27ad2a334cf2576e5
+# pip jupyterlite-core @ https://files.pythonhosted.org/packages/4f/77/874765e62cb857fd5efc58c30a4c0de5a4585e4feb03efc2e5ed6ae27d7c/jupyterlite_core-0.1.2-py3-none-any.whl#sha256=87e257813aba80ea45199c68fb8dbd6f9f252163c738f9be7954ca54236fd9fa
+# pip pyzmq @ https://files.pythonhosted.org/packages/a2/e0/08605421a2ede5d87adbde9685599fa7e6af1df700c657759a1892ced942/pyzmq-25.1.1-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl#sha256=d457aed310f2670f59cc5b57dcfced452aeeed77f9da2b9763616bd57e4dbaae
+# pip argon2-cffi @ https://files.pythonhosted.org/packages/a4/6a/e8a041599e78b6b3752da48000b14c8d1e8a04ded09c88c714ba047f34f5/argon2_cffi-23.1.0-py3-none-any.whl#sha256=c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea
+# pip jsonschema @ https://files.pythonhosted.org/packages/2b/ff/af59fd34bc4d7ac3e6e0cd1f3c10317d329b6c1aee179e8b24ad9a79fbac/jsonschema-4.19.0-py3-none-any.whl#sha256=043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb
+# pip jupyter-client @ https://files.pythonhosted.org/packages/73/d4/3c13d6a300be9e894561aea0b81e7aed46e8f98029b7d9369d90b1fc7ac5/jupyter_client-8.3.1-py3-none-any.whl#sha256=5eb9f55eb0650e81de6b7e34308d8b92d04fe4ec41cd8193a913979e33d8e1a5
+# pip jupyterlite-pyodide-kernel @ https://files.pythonhosted.org/packages/ca/31/acb9356a89f71b2a35f4aada0f23861036da60fbea41281929fcf3ac90b0/jupyterlite_pyodide_kernel-0.1.2-py3-none-any.whl#sha256=acb37f1e29761a92af98f3a57fb360f2eb57912972712402f897d4cf301c5757
+# pip jupyter-events @ https://files.pythonhosted.org/packages/15/0d/3c67f6c432d8085a3cee250e1e235f27b764be90cc2d16fdcc0b1cee9572/jupyter_events-0.7.0-py3-none-any.whl#sha256=4753da434c13a37c3f3c89b500afa0c0a6241633441421f6adafe2fb2e2b924e
+# pip nbformat @ https://files.pythonhosted.org/packages/f4/e7/ef30a90b70eba39e675689b9eaaa92530a71d7435ab8f9cae520814e0caf/nbformat-5.9.2-py3-none-any.whl#sha256=1c5172d786a41b82bcfd0c23f9e6b6f072e8fb49c39250219e4acfff1efe89e9
 # pip nbclient @ https://files.pythonhosted.org/packages/ac/5a/d670ca51e6c3d98574b9647599821590efcd811d71f58e9c89fc59a17685/nbclient-0.8.0-py3-none-any.whl#sha256=25e861299e5303a0477568557c4045eccc7a34c17fc08e7959558707b9ebe548
-# pip nbconvert @ https://files.pythonhosted.org/packages/2f/90/79bf16b584f5150550b0c175ca7a6e88334226e9275cf16db13785105d73/nbconvert-7.4.0-py3-none-any.whl#sha256=af5064a9db524f9f12f4e8be7f0799524bd5b14c1adea37e34e83c95127cc818
-# pip jupyter-server @ https://files.pythonhosted.org/packages/6f/04/b2e87b4ee96a2219df7666706b28c9ebffd9895fc98fe4b5c56b8b6931ce/jupyter_server-2.6.0-py3-none-any.whl#sha256=19525a1515b5999618a91b3e99ec9f6869aa8c5ba73e0b6279fcda918b54ba36
-# pip jupyterlab-server @ https://files.pythonhosted.org/packages/ad/31/cfb84feb3803c1e0e69dbe6928ab9251b9a1548b9092a5013413c0dd49f8/jupyterlab_server-2.22.1-py3-none-any.whl#sha256=1c8eb55c7cd70a50a51fef42a7b4e26ef2f7fc48728f0290604bd89b1dd156e6
-# pip jupyterlite-sphinx @ https://files.pythonhosted.org/packages/34/a9/a050b891d5d06a3fe73f1e16992a846a6f6ba21660ac053f5064cbf98bae/jupyterlite_sphinx-0.8.0-py3-none-any.whl#sha256=4a20fcb585ef036d3ed1c62cd6270351f810bc9586d3638f55e6a98665b3373d
+# pip nbconvert @ https://files.pythonhosted.org/packages/a4/a6/072b6fd04616f1409f37ef5967ea5bbbeea1447d4494c2d98658a95fc985/nbconvert-7.8.0-py3-none-any.whl#sha256=aec605e051fa682ccc7934ccc338ba1e8b626cfadbab0db592106b630f63f0f2
+# pip jupyter-server @ https://files.pythonhosted.org/packages/96/a2/b432812537beaf22a9dbc0d50cb62471e57ef90df42738675760fb3dce98/jupyter_server-2.7.3-py3-none-any.whl#sha256=8e4b90380b59d7a1e31086c4692231f2a2ea4cb269f5516e60aba72ce8317fc9
+# pip jupyterlab-server @ https://files.pythonhosted.org/packages/96/cd/cdabe44549d60e0967904f0bdd9e3756b521112317612a3997eb2fda9181/jupyterlab_server-2.25.0-py3-none-any.whl#sha256=c9f67a98b295c5dee87f41551b0558374e45d449f3edca153dd722140630dcb2
+# pip jupyterlite-sphinx @ https://files.pythonhosted.org/packages/38/c9/5f1142c005cf8d75830b10029e53f074324bc85cfca1f1d0f22a207b771c/jupyterlite_sphinx-0.9.3-py3-none-any.whl#sha256=be6332d16490ea2fa90b78187a2c5e1c357195966a25741d60b1790346571041
diff --git a/build_tools/circle/doc_min_dependencies_environment.yml b/build_tools/circle/doc_min_dependencies_environment.yml
index fb9c1f34ef618..3f3ba57eae8c6 100644
--- a/build_tools/circle/doc_min_dependencies_environment.yml
+++ b/build_tools/circle/doc_min_dependencies_environment.yml
@@ -22,8 +22,9 @@ dependencies:
   - seaborn
   - memory_profiler
   - compilers
-  - sphinx=4.0.1  # min
-  - sphinx-gallery=0.7.0  # min
+  - sphinx=6.0.0  # min
+  - sphinx-gallery=0.10.1  # min
+  - sphinx-copybutton=0.5.2  # min
   - numpydoc=1.2.0  # min
   - sphinx-prompt=1.3.0  # min
   - plotly=5.14.0  # min
diff --git a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock
index a2d5cbb8554ff..78b2081a07b0f 100644
--- a/build_tools/circle/doc_min_dependencies_linux-64_conda.lock
+++ b/build_tools/circle/doc_min_dependencies_linux-64_conda.lock
@@ -1,10 +1,10 @@
 # Generated by conda-lock.
 # platform: linux-64
-# input_hash: b6da36fc22a70d5ecc78b7b7beca6ea69727004c0a3021ad5474f9bcbe59b2ac
+# input_hash: 65e3ed797c4adee3f15a519d65e7dd7ed27340eb62a52cd1d1b597c377bc77f5
 @EXPLICIT
 https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
-https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.5.7-hbcca054_0.conda#f5c65075fc34438d5b456c7f3f5ab695
-https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_15.tar.bz2#5dd5127afd710f91f6a75821bac0a4f0
+https://conda.anaconda.org/conda-forge/linux-64/ca-certificates-2023.7.22-hbcca054_0.conda#a73ecd2988327ad4c8f2c331482917f2
+https://conda.anaconda.org/conda-forge/noarch/kernel-headers_linux-64-2.6.32-he073ed8_16.conda#7ca122655873935e02c91279c5b03c8c
 https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.36.1-hea4e1c9_2.tar.bz2#bd4f2e711b39af170e7ff15163fe87ee
 https://conda.anaconda.org/conda-forge/linux-64/libgcc-devel_linux-64-7.5.0-hda03d7c_20.tar.bz2#2146b25eb2a762a44fab709338a7b6d9
 https://conda.anaconda.org/conda-forge/linux-64/libgfortran4-7.5.0-h14aa051_20.tar.bz2#a072eab836c3a9578ce72b5640ce592d
@@ -13,7 +13,7 @@ https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-13.1.0-hfd8a6a1_0.c
 https://conda.anaconda.org/conda-forge/linux-64/python_abi-3.8-3_cp38.conda#2f3f7af062b42d664117662612022204
 https://conda.anaconda.org/conda-forge/linux-64/libgfortran-ng-7.5.0-h14aa051_20.tar.bz2#c3b2ad091c043c08689e64b10741484b
 https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.1.0-he5830b7_0.conda#56ca14d57ac29a75d23a39eb3ee0ddeb
-https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_15.tar.bz2#66c192522eacf5bb763568b4e415d133
+https://conda.anaconda.org/conda-forge/noarch/sysroot_linux-64-2.12-he073ed8_16.conda#071ea8dceff4d30ac511f4a2f8437cd1
 https://conda.anaconda.org/conda-forge/linux-64/binutils_impl_linux-64-2.36.1-h193b22a_2.tar.bz2#32aae4265554a47ea77f7c09f86aeb3b
 https://conda.anaconda.org/conda-forge/linux-64/binutils-2.36.1-hdd6e379_2.tar.bz2#3111f86041b5b6863545ca49130cca95
 https://conda.anaconda.org/conda-forge/linux-64/binutils_linux-64-2.36-hf3e587d_33.tar.bz2#72b245322c589284f1b92a5c971e5cb6
@@ -24,18 +24,17 @@ https://conda.anaconda.org/conda-forge/linux-64/gettext-0.21.1-h27087fc_0.tar.bz
 https://conda.anaconda.org/conda-forge/linux-64/icu-64.2-he1b5a44_1.tar.bz2#8e881214a23508f1541eb7a3135d6fcb
 https://conda.anaconda.org/conda-forge/linux-64/jpeg-9e-h0b41bf4_3.conda#c7a069243e1fbe9a556ed2ec030e6407
 https://conda.anaconda.org/conda-forge/linux-64/lerc-4.0.0-h27087fc_0.tar.bz2#76bbff344f0134279f225174e9064c8f
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.0.9-h166bdaf_8.tar.bz2#9194c9bf9428035a05352d031462eae4
 https://conda.anaconda.org/conda-forge/linux-64/libdeflate-1.17-h0b41bf4_0.conda#5cc781fd91968b11a8a7fdbee0982676
 https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.5.0-hcb278e6_1.conda#6305a3dd2752c76335295da4e581f2fd
 https://conda.anaconda.org/conda-forge/linux-64/libffi-3.2.1-he1b5a44_1007.tar.bz2#11389072d7d6036fd811c3d9460475cd
 https://conda.anaconda.org/conda-forge/linux-64/libiconv-1.17-h166bdaf_0.tar.bz2#b62b52da46c39ee2bc3c162ac7f1804d
 https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.38.1-h0b41bf4_0.conda#40b61aab5c7ba9ff276c41cfffe6b80b
-https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.0-h0b41bf4_0.conda#0d4a7508d8c6c65314f2b9c1f56ad408
+https://conda.anaconda.org/conda-forge/linux-64/libwebp-base-1.3.1-hd590300_0.conda#82bf6f63eb15ef719b556b63feec3a77
 https://conda.anaconda.org/conda-forge/linux-64/libxkbcommon-0.10.0-he1b5a44_0.tar.bz2#78ccac2098edcd3673af2ceb3e95f932
-https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-h166bdaf_4.tar.bz2#f3f9de449d32ca9b9c66a22863c96f41
+https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.2.13-hd590300_5.conda#f36c115f1ee199da648e0597ec2047ad
 https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.4-hcb278e6_0.conda#681105bccc2a3f7f1a837d47d39c9179
 https://conda.anaconda.org/conda-forge/linux-64/nspr-4.35-h27087fc_0.conda#da0ec11a6454ae19bff5b02ed881a2b1
-https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1u-hd590300_0.conda#cc1c2db83ae28a28871d52b035739488
+https://conda.anaconda.org/conda-forge/linux-64/openssl-1.1.1v-hd590300_0.conda#b1701dc29287ef4638ccc7f12cf73405
 https://conda.anaconda.org/conda-forge/linux-64/pcre-8.45-h9c3ff4c_0.tar.bz2#c05d1820a6d34ff07aaaab7a9b7eddaa
 https://conda.anaconda.org/conda-forge/linux-64/pthread-stubs-0.4-h36c2ea0_1001.tar.bz2#22dad4df6e8630e8dff2428f6f6a7036
 https://conda.anaconda.org/conda-forge/linux-64/xorg-libxau-1.0.11-hd590300_0.conda#2c80dc38fface310c9bd81b17037fee5
@@ -46,17 +45,14 @@ https://conda.anaconda.org/conda-forge/linux-64/expat-2.5.0-hcb278e6_1.conda#8b9
 https://conda.anaconda.org/conda-forge/linux-64/gcc_linux-64-7.5.0-h47867f9_33.tar.bz2#3a31c3f430a31184a5d07e67d3b24e2c
 https://conda.anaconda.org/conda-forge/linux-64/gfortran_impl_linux-64-7.5.0-h56cb351_20.tar.bz2#8f897b30195bd3a2251b4c51c3cc91cf
 https://conda.anaconda.org/conda-forge/linux-64/gxx_impl_linux-64-7.5.0-hd0bb8aa_20.tar.bz2#dbe78fc5fb9c339f8e55426559e12f7b
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.0.9-h166bdaf_8.tar.bz2#4ae4d7795d33e02bd20f6b23d91caf82
-https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.0.9-h166bdaf_8.tar.bz2#04bac51ba35ea023dc48af73c1c88c25
 https://conda.anaconda.org/conda-forge/linux-64/libllvm9-9.0.1-default_hc23dcda_7.tar.bz2#9f4686a2c319355fe8636ca13783c3b4
 https://conda.anaconda.org/conda-forge/linux-64/libpng-1.6.39-h753d276_0.conda#e1c890aebdebbfbf87e2c917187b4416
-https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.42.0-h2797004_0.conda#fdaae20a1cf7cd62130a0973190a31b7
+https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.43.0-h2797004_0.conda#903fa782a9067d5934210df6d79220f6
 https://conda.anaconda.org/conda-forge/linux-64/libxcb-1.13-h7f98852_1004.tar.bz2#b3653fdc58d03face9724f602218a904
 https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8228510_1.conda#47d31b792659ce70f470b5c82fdfb7a4
 https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.12-h27826a3_0.tar.bz2#5b8c42eb62e9fc961af70bdd6a26e168
-https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-h166bdaf_4.tar.bz2#4b11e365c0275b808be78b30f904e295
-https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.2-h3eb15da_6.conda#6b63daed8feeca47be78f323e793d555
-https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.0.9-h166bdaf_8.tar.bz2#e5613f2bc717e9945840ff474419b8e4
+https://conda.anaconda.org/conda-forge/linux-64/zlib-1.2.13-hd590300_5.conda#68c34ec6149623be41a1933ab996a209
+https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.5-hfc55251_0.conda#04b88013080254850d6c01ed54810589
 https://conda.anaconda.org/conda-forge/linux-64/c-compiler-1.1.1-h516909a_0.tar.bz2#d98aa4948ec35f52907e2d6152e2b255
 https://conda.anaconda.org/conda-forge/linux-64/freetype-2.12.1-hca18f0e_1.conda#e1232042de76d24539a436d37597eb06
 https://conda.anaconda.org/conda-forge/linux-64/gfortran_linux-64-7.5.0-h78c8a43_33.tar.bz2#b2879010fb369f4012040f7a27657cd8
@@ -65,10 +61,9 @@ https://conda.anaconda.org/conda-forge/linux-64/libclang-9.0.1-default_hb4e5071_
 https://conda.anaconda.org/conda-forge/linux-64/libglib-2.66.3-hbe7bbb4_0.tar.bz2#d5a09a9e981849b751cb75656b7302a0
 https://conda.anaconda.org/conda-forge/linux-64/libtiff-4.5.0-h6adf6a1_2.conda#2e648a34072eb39d7c4fc2a9981c5f0c
 https://conda.anaconda.org/conda-forge/linux-64/libxml2-2.9.10-hee79883_0.tar.bz2#0217b0926808b1adf93247bba489d733
-https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.5-h4dfa4b3_0.conda#9441a97b74c692d969ff465ac6c0ccea
-https://conda.anaconda.org/conda-forge/linux-64/nss-3.89-he45b914_0.conda#2745719a58eeaab6657256a3f142f099
-https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.42.0-h2c6b66d_0.conda#1192f6ec654a5bc4ee1d64bdc4a3e5cc
-https://conda.anaconda.org/conda-forge/linux-64/brotli-1.0.9-h166bdaf_8.tar.bz2#2ff08978892a3e8b954397c461f18418
+https://conda.anaconda.org/conda-forge/linux-64/llvm-openmp-16.0.6-h4dfa4b3_0.conda#b096c85c415519259e731d8fb719a3ef
+https://conda.anaconda.org/conda-forge/linux-64/nss-3.92-h1d7d5a4_0.conda#22c89a3d87828fe925b310b9cdf0f574
+https://conda.anaconda.org/conda-forge/linux-64/sqlite-3.43.0-h2c6b66d_0.conda#713f9eac95d051abe14c3774376854fe
 https://conda.anaconda.org/conda-forge/linux-64/cxx-compiler-1.1.1-hc9558a2_0.tar.bz2#1eb7c67eb11eab0c98a87f84174fdde1
 https://conda.anaconda.org/conda-forge/linux-64/fontconfig-2.14.2-h14ed4e7_0.conda#0f69b688f52ff6da70bccb7ff7001d1d
 https://conda.anaconda.org/conda-forge/linux-64/fortran-compiler-1.1.1-he991be0_0.tar.bz2#e38ac82cc517b9e245c1ae99f9f140da
@@ -77,96 +72,98 @@ https://conda.anaconda.org/conda-forge/linux-64/mkl-2020.4-h726a3e6_304.tar.bz2#
 https://conda.anaconda.org/conda-forge/linux-64/openjpeg-2.5.0-hfec8fc6_2.conda#5ce6a42505c6e9e6151c54c3ec8d68ea
 https://conda.anaconda.org/conda-forge/linux-64/python-3.8.6-h852b56e_0_cpython.tar.bz2#dd65401dfb61ac030edc0dc4d15c2c51
 https://conda.anaconda.org/conda-forge/noarch/alabaster-0.7.13-pyhd8ed1ab_0.conda#06006184e203b61d3525f90de394471e
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
-https://conda.anaconda.org/conda-forge/noarch/click-8.1.3-unix_pyhd8ed1ab_2.tar.bz2#20e4087407c7cb04a40817114b333dbf
+https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py38h17151c0_0.conda#5b332445993432e76df706fe1ebe776d
+https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
+https://conda.anaconda.org/conda-forge/noarch/click-8.1.7-unix_pyh707e725_0.conda#f3ad426304898027fc619827ff428eca
 https://conda.anaconda.org/conda-forge/noarch/cloudpickle-2.2.1-pyhd8ed1ab_0.conda#b325bfc4cff7d7f8a868f1f7ecc4ed16
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/linux-64/compilers-1.1.1-0.tar.bz2#1ba267e19dbaf3db9dd0404e6fb9cdb9
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
 https://conda.anaconda.org/conda-forge/linux-64/cython-0.29.33-py38h8dc9893_0.conda#5d50cd654981f0ccc7c878ac297afaa7
-https://conda.anaconda.org/conda-forge/linux-64/docutils-0.17.1-py38h578d9bd_3.tar.bz2#34e1f12e3ed15aff218644e9d865b722
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
-https://conda.anaconda.org/conda-forge/noarch/fsspec-2023.5.0-pyh1a96a4e_0.conda#20edd290b319aa0eff3e9055375756dc
+https://conda.anaconda.org/conda-forge/linux-64/docutils-0.19-py38h578d9bd_1.tar.bz2#3746b24949251f1a00ae0d616d4cdc1b
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
+https://conda.anaconda.org/conda-forge/noarch/fsspec-2023.6.0-pyh1a96a4e_0.conda#50ea2067ec92dfcc38b4f07992d7e235
 https://conda.anaconda.org/conda-forge/linux-64/glib-2.66.3-h58526e2_0.tar.bz2#62c2e5c84f6cdc7ded2307ef9c30dc8c
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2#7de5386c8fea29e76b303f37dde4c352
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.4-py38h43d8883_1.tar.bz2#41ca56d5cac7bfc7eb4fcdbee878eb84
+https://conda.anaconda.org/conda-forge/linux-64/kiwisolver-1.4.5-py38h7f3f72f_0.conda#eec56ac40315e360dd57c2de6604a325
 https://conda.anaconda.org/conda-forge/linux-64/libblas-3.8.0-20_mkl.tar.bz2#8fbce60932c01d0e193a1a814f2002be
 https://conda.anaconda.org/conda-forge/noarch/locket-1.0.0-pyhd8ed1ab_0.tar.bz2#91e27ef3d05cc772ce627e51cff111c4
-https://conda.anaconda.org/conda-forge/linux-64/markupsafe-1.1.1-py38h0a891b7_4.tar.bz2#d182e0c60439427453ed4a7abd28ef0d
+https://conda.anaconda.org/conda-forge/linux-64/markupsafe-2.1.3-py38h01eb140_0.conda#17d2a5314adf0f25220eeebb312d00a4
 https://conda.anaconda.org/conda-forge/noarch/networkx-3.1-pyhd8ed1ab_0.conda#254f787d5068bc89f578bf63893ce8b4
 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
 https://conda.anaconda.org/conda-forge/linux-64/pillow-9.4.0-py38hde6dc18_1.conda#3de5619d3f556f966189e5251a266125
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
 https://conda.anaconda.org/conda-forge/linux-64/psutil-5.9.5-py38h1de0b5d_0.conda#92e899e7b0ed27c793014d1fa54f9b7b
 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
-https://conda.anaconda.org/conda-forge/noarch/pygments-2.15.1-pyhd8ed1ab_0.conda#d316679235612869eba305aa7d41d9bf
-https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc
+https://conda.anaconda.org/conda-forge/noarch/pygments-2.16.1-pyhd8ed1ab_0.conda#40e5cb18165466773619e5c963f00a7b
+https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.1.1-pyhd8ed1ab_0.conda#176f7d56f0cfe9008bdf1bccd7de02fb
 https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
 https://conda.anaconda.org/conda-forge/noarch/pytz-2023.3-pyhd8ed1ab_0.conda#d3076b483092a435832603243567bc31
-https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0-py38h0a891b7_5.tar.bz2#0856c59f9ddb710c640dc0428d66b1b7
+https://conda.anaconda.org/conda-forge/linux-64/pyyaml-6.0.1-py38h01eb140_0.conda#ece207648b63c36c16a2caa201509e51
 https://conda.anaconda.org/conda-forge/linux-64/setuptools-59.8.0-py38h578d9bd_1.tar.bz2#da023e4a9c777abc28434d7a6473dcc2
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
 https://conda.anaconda.org/conda-forge/noarch/snowballstemmer-2.2.0-pyhd8ed1ab_0.tar.bz2#4d22a9315e78c6827f806065957d566e
 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-applehelp-1.0.4-pyhd8ed1ab_0.conda#5a31a7d564f551d0e6dff52fd8cb5b16
 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-devhelp-1.0.2-py_0.tar.bz2#68e01cac9d38d0e717cd5c87bc3d2cc9
 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-htmlhelp-2.0.1-pyhd8ed1ab_0.conda#6c8c4d6eb2325e59290ac6dbbeacd5f0
-https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-py_0.tar.bz2#67cd9d9c0382d37479b4d306c369a2d4
+https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-jsmath-1.0.1-pyhd8ed1ab_0.conda#da1d979339e2714c30a8e806a33ec087
 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-qthelp-1.0.3-py_0.tar.bz2#d01180388e6d1838c3e1ad029590aa7a
 https://conda.anaconda.org/conda-forge/noarch/sphinxcontrib-serializinghtml-1.1.5-pyhd8ed1ab_2.tar.bz2#9ff55a0901cf952f05c654394de76bf7
-https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.2-pyhd8ed1ab_0.conda#7b39e842b52966a99e229739cd4dc36e
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/noarch/tenacity-8.2.3-pyhd8ed1ab_0.conda#1482e77f87c6a702a7e05ef22c9b197b
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
 https://conda.anaconda.org/conda-forge/noarch/toolz-0.12.0-pyhd8ed1ab_0.tar.bz2#92facfec94bc02d6ccf42e7173831a36
-https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.2-py38h01eb140_0.conda#3db869202b0e523d606d13e81ca79ab6
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352
-https://conda.anaconda.org/conda-forge/noarch/wheel-0.40.0-pyhd8ed1ab_0.conda#49bb0d9e60ce1db25e151780331bb5f3
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf
+https://conda.anaconda.org/conda-forge/linux-64/tornado-6.3.3-py38h01eb140_0.conda#465bbfc0eb2022837d957d045b6b627a
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.7.1-pyha770c72_0.conda#c39d6a09fe819de4951c2642629d9115
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.2-pyhd8ed1ab_0.conda#1ccd092478b3e0ee10d7a891adbf8a4f
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.16.2-pyhd8ed1ab_0.conda#2da0451b54c4563c32490cb1b7cf68a1
 https://conda.anaconda.org/conda-forge/noarch/babel-2.12.1-pyhd8ed1ab_1.conda#ac432e732804a81ddcf29c92ead57cde
-https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.0-py38h0a891b7_1.tar.bz2#183f6160ab3498b882e903b06be7d430
+https://conda.anaconda.org/conda-forge/linux-64/cytoolz-0.12.2-py38h01eb140_0.conda#e9d465b78d0b41beeb6bcceb6714520d
 https://conda.anaconda.org/conda-forge/linux-64/dbus-1.13.6-hfdff14a_1.tar.bz2#4caaca6356992ee545080c7d7193b5a3
 https://conda.anaconda.org/conda-forge/linux-64/gstreamer-1.14.5-h36ae1b5_2.tar.bz2#00084ab2657be5bf0ba0757ccde797ef
-https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9
-https://conda.anaconda.org/conda-forge/noarch/jinja2-2.11.3-pyhd8ed1ab_2.tar.bz2#bdedf6199eec03402a0c5db1f25e891e
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
+https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.8.0-pyha770c72_0.conda#4e9f59a060c3be52bc4ddc46ee9b6946
+https://conda.anaconda.org/conda-forge/noarch/jinja2-3.1.2-pyhd8ed1ab_1.tar.bz2#c8490ed5c70966d232fdd389d0dbed37
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
 https://conda.anaconda.org/conda-forge/linux-64/libcblas-3.8.0-20_mkl.tar.bz2#14b25490fdcc44e879ac6c10fe764f68
 https://conda.anaconda.org/conda-forge/linux-64/liblapack-3.8.0-20_mkl.tar.bz2#52c0ae3606eeae7e1d493f37f336f4f5
 https://conda.anaconda.org/conda-forge/noarch/memory_profiler-0.61.0-pyhd8ed1ab_0.tar.bz2#8b45f9f2b2f7a98b0ec179c8991a4a9b
 https://conda.anaconda.org/conda-forge/noarch/partd-1.4.0-pyhd8ed1ab_0.conda#721dab5803ea92ce02ddc4ee50aa0c48
-https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df
+https://conda.anaconda.org/conda-forge/noarch/pip-23.2.1-pyhd8ed1ab_0.conda#e2783aa3f9235225eec92f9081c5b801
 https://conda.anaconda.org/conda-forge/noarch/plotly-5.14.0-pyhd8ed1ab_0.conda#6a7bcc42ef58dd6cf3da9333ea102433
+https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d
+https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.7.1-hd8ed1ab_0.conda#f96688577f1faa58096d06a45136afa2
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e
 https://conda.anaconda.org/conda-forge/linux-64/gst-plugins-base-1.14.5-h0935bb2_2.tar.bz2#eb125ee86480e00a4a1ed45a577c3311
-https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-6.6.0-hd8ed1ab_0.conda#3cbc9615f10a3d471532b83e4250b971
+https://conda.anaconda.org/conda-forge/noarch/importlib_metadata-6.8.0-hd8ed1ab_0.conda#b279b07ce18058034e5b3606ba103a8b
 https://conda.anaconda.org/conda-forge/linux-64/liblapacke-3.8.0-20_mkl.tar.bz2#8274dc30518af9df1de47f5d9e73165c
 https://conda.anaconda.org/conda-forge/linux-64/numpy-1.17.3-py38h95a1406_0.tar.bz2#bc0cbf611fe2f86eab29b98e51404f5e
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4
+https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236
+https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
 https://conda.anaconda.org/conda-forge/linux-64/blas-2.20-mkl.tar.bz2#e7d09a07f5413e53dca5282b8fa50bed
 https://conda.anaconda.org/conda-forge/noarch/dask-core-2023.5.0-pyhd8ed1ab_0.conda#03ed2d040648a5ba1063bf1cb0d87b78
-https://conda.anaconda.org/conda-forge/noarch/imageio-2.28.1-pyh24c5eb1_0.conda#ef3541a8cd9a55879932486a097b7fed
+https://conda.anaconda.org/conda-forge/noarch/imageio-2.31.1-pyh24c5eb1_0.conda#1051cc0376612ba101d4f59e954a1ff4
 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-base-3.1.3-py38h250f245_0.tar.bz2#eb182969d8ed019d4de6939f393270d2
 https://conda.anaconda.org/conda-forge/linux-64/pandas-1.0.5-py38hcb8c335_0.tar.bz2#1e1b4382170fd26cf722ef008ffb651e
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6
-https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
+https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
 https://conda.anaconda.org/conda-forge/linux-64/pywavelets-1.1.1-py38h5c078b8_3.tar.bz2#dafeef887e68bd18ec84681747ca0fd5
 https://conda.anaconda.org/conda-forge/linux-64/qt-5.12.5-hd8c4c69_1.tar.bz2#0e105d4afe0c3c81c4fbd9937ec4f359
 https://conda.anaconda.org/conda-forge/linux-64/scipy-1.5.0-py38h18bccfc_0.tar.bz2#b6fda3b4ee494afef756621daa115d4d
-https://conda.anaconda.org/conda-forge/noarch/sphinx-4.0.1-pyh6c4a22f_2.tar.bz2#c203dcc46f262853ecbb9552c50d664e
+https://conda.anaconda.org/conda-forge/noarch/sphinx-6.0.0-pyhd8ed1ab_2.conda#ac1d3b55da1669ee3a56973054fd7efb
 https://conda.anaconda.org/conda-forge/noarch/numpydoc-1.2-pyhd8ed1ab_0.tar.bz2#025ad7ca2c7f65007ab6b6f5d93a56eb
 https://conda.anaconda.org/conda-forge/noarch/patsy-0.5.3-pyhd8ed1ab_0.tar.bz2#50ef6b29b1fb0768ca82c5aeb4fb2d96
 https://conda.anaconda.org/conda-forge/linux-64/pyamg-4.0.0-py38hf6732f7_1003.tar.bz2#44e00bf7a4b6a564e9313181aaea2615
 https://conda.anaconda.org/conda-forge/linux-64/pyqt-5.12.3-py38ha8c2ead_3.tar.bz2#242c206b0c30fdc4c18aea16f04c4262
-https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
 https://conda.anaconda.org/conda-forge/linux-64/scikit-image-0.16.2-py38hb3f55d8_0.tar.bz2#468b398fefac8884cd6e6513af66549b
 https://conda.anaconda.org/conda-forge/noarch/seaborn-base-0.12.2-pyhd8ed1ab_0.conda#cf88f3a1c11536bc3c10c14ad00ccc42
-https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.7.0-py_0.tar.bz2#80bad3f857ecc86a4ab73f3e57addd13
+https://conda.anaconda.org/conda-forge/noarch/sphinx-copybutton-0.5.2-pyhd8ed1ab_0.conda#ac832cc43adc79118cf6e23f1f9b8995
+https://conda.anaconda.org/conda-forge/noarch/sphinx-gallery-0.10.1-pyhd8ed1ab_0.tar.bz2#4918585fe5e5341740f7e63c61743efb
 https://conda.anaconda.org/conda-forge/noarch/sphinx-prompt-1.3.0-py_0.tar.bz2#9363002e2a134a287af4e32ff0f26cdc
 https://conda.anaconda.org/conda-forge/linux-64/matplotlib-3.1.3-py38_0.tar.bz2#1992ab91bbff86ded8d99d1f488d8e8b
 https://conda.anaconda.org/conda-forge/linux-64/statsmodels-0.12.2-py38h5c078b8_0.tar.bz2#33787719ad03d33cffc4e2e3ea82bc9e
diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py
index dfcc600957469..345e08b4bece4 100755
--- a/build_tools/circle/list_versions.py
+++ b/build_tools/circle/list_versions.py
@@ -4,9 +4,9 @@
 import json
 import re
 import sys
+from urllib.request import urlopen
 
 from sklearn.utils.fixes import parse_version
-from urllib.request import urlopen
 
 
 def json_urlread(url):
diff --git a/build_tools/circle/push_doc.sh b/build_tools/circle/push_doc.sh
index c32a2d31fa811..f959b8b65c85c 100755
--- a/build_tools/circle/push_doc.sh
+++ b/build_tools/circle/push_doc.sh
@@ -1,8 +1,8 @@
 #!/bin/bash
 # This script is meant to be called in the "deploy" step defined in
-# circle.yml. See https://circleci.com/docs/ for more details.
+# .circleci/config.yml. See https://circleci.com/docs/ for more details.
 # The behavior of the script is controlled by environment variable defined
-# in the circle.yml in the top level folder of the project.
+# in the .circleci/config.yml file.
 
 set -ex
 
diff --git a/build_tools/cirrus/arm_tests.yml b/build_tools/cirrus/arm_tests.yml
index a6e5919ecc32f..f64adbcdd4748 100644
--- a/build_tools/cirrus/arm_tests.yml
+++ b/build_tools/cirrus/arm_tests.yml
@@ -17,4 +17,14 @@ linux_aarch64_test_task:
     folder: /root/.conda/pkgs
     fingerprint_script: cat build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock
 
-  test_script: bash build_tools/cirrus/build_test_arm.sh
+  install_python_script: |
+    # Install python so that update_tracking_issue has access to a Python
+    apt install -y python3 python-is-python3
+
+  test_script: |
+    bash build_tools/cirrus/build_test_arm.sh
+    # On success, this script is run updating the issue.
+    bash build_tools/cirrus/update_tracking_issue.sh true
+
+  on_failure:
+    update_tracker_script: bash build_tools/cirrus/update_tracking_issue.sh false
diff --git a/build_tools/cirrus/arm_wheel.yml b/build_tools/cirrus/arm_wheel.yml
index ece984c320249..f210eea817601 100644
--- a/build_tools/cirrus/arm_wheel.yml
+++ b/build_tools/cirrus/arm_wheel.yml
@@ -16,22 +16,22 @@ macos_arm64_wheel_task:
     # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires.
     BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f]
   matrix:
+    # Only the latest Python version is built and tested on Cirrus CI, the other
+    # macOS arm64 builds are on GitHub Actions. The reason is that macOS time is
+    # 5x more expensive than Linux times on Cirrus CI and the credits are limited
+    # (for free accounts).
+    # Note that the macOS arm64 builds are cross compiled on GitHub Actions (without
+    # running the tests) and while the macOS arm64 build for the latest Python version
+    # is actually tested on Cirrus CI.
     - env:
-        CIBW_BUILD: cp38-macosx_arm64
-    - env:
-        CIBW_BUILD: cp39-macosx_arm64
-    - env:
-        CIBW_BUILD: cp310-macosx_arm64
-    - env:
-        CIBW_BUILD: cp311-macosx_arm64
+        CIBW_BUILD: cp312-macosx_arm64
 
   conda_script:
-    - curl -L -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
+    - curl -L --retry 10 -o ~/mambaforge.sh https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-MacOSX-arm64.sh
     - bash ~/mambaforge.sh -b -p ~/mambaforge
 
   cibuildwheel_script:
     - bash build_tools/wheels/build_wheels.sh
-    - bash build_tools/cirrus/update_tracking_issue.sh true
 
   on_failure:
     update_tracker_script:
@@ -61,19 +61,25 @@ linux_arm64_wheel_task:
     # See `maint_tools/update_tracking_issue.py` for details on the permissions the token requires.
     BOT_GITHUB_TOKEN: ENCRYPTED[9b50205e2693f9e4ce9a3f0fcb897a259289062fda2f5a3b8aaa6c56d839e0854a15872f894a70fca337dd4787274e0f]
   matrix:
+    # Only the latest Python version is tested
     - env:
         CIBW_BUILD: cp38-manylinux_aarch64
+        CIBW_TEST_SKIP: "*_aarch64"
     - env:
         CIBW_BUILD: cp39-manylinux_aarch64
+        CIBW_TEST_SKIP: "*_aarch64"
     - env:
         CIBW_BUILD: cp310-manylinux_aarch64
+        CIBW_TEST_SKIP: "*_aarch64"
     - env:
         CIBW_BUILD: cp311-manylinux_aarch64
+        CIBW_TEST_SKIP: "*_aarch64"
+    - env:
+        CIBW_BUILD: cp312-manylinux_aarch64
 
   cibuildwheel_script:
     - apt install -y python3 python-is-python3
     - bash build_tools/wheels/build_wheels.sh
-    - bash build_tools/cirrus/update_tracking_issue.sh true
 
   on_failure:
     update_tracker_script:
@@ -82,6 +88,17 @@ linux_arm64_wheel_task:
   wheels_artifacts:
     path: "wheelhouse/*"
 
+# Update tracker when all jobs are successful
+update_tracker_success:
+  depends_on:
+    - macos_arm64_wheel
+    - linux_arm64_wheel
+  container:
+    image: python:3.11
+  # Only update tracker for nightly builds
+  only_if: $CIRRUS_CRON == "nightly"
+  update_script:
+    - bash build_tools/cirrus/update_tracking_issue.sh true
 
 wheels_upload_task:
   depends_on:
@@ -94,7 +111,7 @@ wheels_upload_task:
   env:
     # Upload tokens have been encrypted via the CirrusCI interface:
     # https://cirrus-ci.org/guide/writing-tasks/#encrypted-variables
-    SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ENCRYPTED[8f20120b18a07d8a11192b98bff1f562883558e1f4c53f8ead1577113785a4105ee6f14ad9b5dacf1803c19c4913fe1c]
+    SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN: ENCRYPTED[9cf0529227577d503f2e19ef31cb690a2272cb243a217fb9a1ceda5cc608e8ccc292050fde9dca94cab766e1dd418519]
     SCIKIT_LEARN_STAGING_UPLOAD_TOKEN: ENCRYPTED[8fade46af37fa645e57bd1ee21683337aa369ba56f6307ce13889f1e74df94e5bdd21d323baac21e332fd87b8949659a]
     ARTIFACTS_PATH: wheelhouse
   upload_script: |
diff --git a/build_tools/cirrus/build_test_arm.sh b/build_tools/cirrus/build_test_arm.sh
index 4eeef6ec2dc0c..551dc3689e010 100755
--- a/build_tools/cirrus/build_test_arm.sh
+++ b/build_tools/cirrus/build_test_arm.sh
@@ -25,7 +25,7 @@ setup_ccache() {
 MAMBAFORGE_URL="https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-aarch64.sh"
 
 # Install Mambaforge
-wget $MAMBAFORGE_URL -O mambaforge.sh
+curl -L --retry 10 $MAMBAFORGE_URL -o mambaforge.sh
 MAMBAFORGE_PATH=$HOME/mambaforge
 bash ./mambaforge.sh -b -p $MAMBAFORGE_PATH
 export PATH=$MAMBAFORGE_PATH/bin:$PATH
diff --git a/build_tools/cirrus/py39_conda_forge_environment.yml b/build_tools/cirrus/py39_conda_forge_environment.yml
index 70aedd73bf883..78a9df9cf2451 100644
--- a/build_tools/cirrus/py39_conda_forge_environment.yml
+++ b/build_tools/cirrus/py39_conda_forge_environment.yml
@@ -8,7 +8,7 @@ dependencies:
   - numpy
   - blas
   - scipy
-  - cython
+  - cython<3.0.0
   - joblib
   - threadpoolctl
   - matplotlib
diff --git a/build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock b/build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock
index 8234eb15a0820..5f9fdd498a3e0 100644
--- a/build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock
+++ b/build_tools/cirrus/py39_conda_forge_linux-aarch64_conda.lock
@@ -1,8 +1,8 @@
 # Generated by conda-lock.
 # platform: linux-aarch64
-# input_hash: de5bfe2a68b349f08233af7b94fc3b2045503b21289e8d3bdb30a1613fd0ddb8
+# input_hash: cdaa5d55d8877a790d9ff09cc07901d3dc654506873fdf6b254372ee2f516f41
 @EXPLICIT
-https://conda.anaconda.org/conda-forge/linux-aarch64/ca-certificates-2023.5.7-hcefe29a_0.conda#331e624442b88d96bc05a7f2d38c61a4
+https://conda.anaconda.org/conda-forge/linux-aarch64/ca-certificates-2023.7.22-hcefe29a_0.conda#95d7f998087114466fa91e7c2887fa2f
 https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.40-h2d8c526_0.conda#16246d69e945d0b1969a6099e7c5d457
 https://conda.anaconda.org/conda-forge/linux-aarch64/libgfortran5-13.1.0-h24e4805_0.conda#069e75bfdbed7744ee64a2b840fccc4e
 https://conda.anaconda.org/conda-forge/linux-aarch64/libstdcxx-ng-13.1.0-h452befe_0.conda#572f5798bb3d4cc79650f0ca3149aeaa
@@ -13,7 +13,7 @@ https://conda.anaconda.org/conda-forge/linux-aarch64/_openmp_mutex-4.5-2_kmp_llv
 https://conda.anaconda.org/conda-forge/linux-aarch64/libgcc-ng-13.1.0-h2b4548d_0.conda#02619409d02932e28d694144b509597d
 https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-hf897c2e_4.tar.bz2#2d787570a729e273a4e75775ddf3348a
 https://conda.anaconda.org/conda-forge/linux-aarch64/lerc-4.0.0-h4de3ea5_0.tar.bz2#1a0ffc65e03ce81559dbcb0695ad1476
-https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlicommon-1.0.9-h4e544f5_8.tar.bz2#3cedc3935cfaa2a5303daa25fb12cb1d
+https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlicommon-1.1.0-h31becfc_0.conda#248405ab84e457c61d2f3cae91bf90d9
 https://conda.anaconda.org/conda-forge/linux-aarch64/libdeflate-1.18-hb4cce97_0.conda#e0d520842c0ae66b560cc65f9b96f658
 https://conda.anaconda.org/conda-forge/linux-aarch64/libffi-3.4.2-h3557bc0_5.tar.bz2#dddd85f4d52121fab0a8b099c5e06501
 https://conda.anaconda.org/conda-forge/linux-aarch64/libhiredis-1.0.2-h05efe27_0.tar.bz2#a87f068744fd20334cd41489eb163bee
@@ -21,81 +21,81 @@ https://conda.anaconda.org/conda-forge/linux-aarch64/libjpeg-turbo-2.1.5.1-hb4cc
 https://conda.anaconda.org/conda-forge/linux-aarch64/libnsl-2.0.0-hf897c2e_0.tar.bz2#36fdbc05c9d9145ece86f5a63c3f352e
 https://conda.anaconda.org/conda-forge/linux-aarch64/libopenblas-0.3.23-pthreads_hd703e6f_0.conda#b8265d6197f98ed95a6cc2aa5efb708b
 https://conda.anaconda.org/conda-forge/linux-aarch64/libuuid-2.38.1-hb4cce97_0.conda#000e30b09db0b7c775b21695dff30969
-https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.3.0-hb4cce97_0.conda#53670eaee6d77d9fe60a84f7fd226a4c
-https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.2.13-h4e544f5_4.tar.bz2#88596b6277fe6d39f046983aae6044db
+https://conda.anaconda.org/conda-forge/linux-aarch64/libwebp-base-1.3.1-h31becfc_0.conda#cef7349b72e170880662f4e39864fe44
+https://conda.anaconda.org/conda-forge/linux-aarch64/libzlib-1.2.13-h31becfc_5.conda#b213aa87eea9491ef7b129179322e955
 https://conda.anaconda.org/conda-forge/linux-aarch64/ncurses-6.4-h2e1726e_0.conda#40beaf447150c2760affc591c7509595
-https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.1.1-h31becfc_1.conda#a8e811c3390d93e5db0cef68e52f349f
+https://conda.anaconda.org/conda-forge/linux-aarch64/openssl-3.1.2-h31becfc_0.conda#fde5a105b2bc9d6eced8a23005492c7e
 https://conda.anaconda.org/conda-forge/linux-aarch64/pthread-stubs-0.4-hb9de7d4_1001.tar.bz2#d0183ec6ce0b5aaa3486df25fa5f0ded
 https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxau-1.0.11-h31becfc_0.conda#13de34f69cb73165dbe08c1e9148bedb
 https://conda.anaconda.org/conda-forge/linux-aarch64/xorg-libxdmcp-1.1.3-h3557bc0_0.tar.bz2#a6c9016ae1ca5c47a3603ed4cd65fedd
 https://conda.anaconda.org/conda-forge/linux-aarch64/xz-5.2.6-h9cdd2b7_0.tar.bz2#83baad393a31d59c20b63ba4da6592df
 https://conda.anaconda.org/conda-forge/linux-aarch64/libblas-3.9.0-17_linuxaarch64_openblas.conda#28fabad08c2cc13f3fd507cfaeb12b7c
-https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.0.9-h4e544f5_8.tar.bz2#319956380b383ec9f6a46d585599c028
-https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.0.9-h4e544f5_8.tar.bz2#56a0a025208af24e2b43b2bbeee79802
+https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlidec-1.1.0-h31becfc_0.conda#0d9244dd0d6d2d3755e0940df962c7e6
+https://conda.anaconda.org/conda-forge/linux-aarch64/libbrotlienc-1.1.0-h31becfc_0.conda#4c9b06b89a25951ef6abb8d60e5c0e97
 https://conda.anaconda.org/conda-forge/linux-aarch64/libpng-1.6.39-hf9034f9_0.conda#5ec9052384a6ac85e9111e9ac7c5ec4c
-https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.42.0-h194ca79_0.conda#5fc895d5063af554f24a7eb69faff054
+https://conda.anaconda.org/conda-forge/linux-aarch64/libsqlite-3.43.0-h194ca79_0.conda#9df5c3004549dc17b49843e0ce88c0fe
 https://conda.anaconda.org/conda-forge/linux-aarch64/libxcb-1.15-h2a766a3_0.conda#eb3d8c8170e3d03f2564ed2024aa00c8
 https://conda.anaconda.org/conda-forge/linux-aarch64/openblas-0.3.23-pthreads_hef96516_0.conda#be3708e4cd351496c0ca051b552f4e04
 https://conda.anaconda.org/conda-forge/linux-aarch64/readline-8.2-h8fc344f_1.conda#105eb1e16bf83bfb2eb380a48032b655
 https://conda.anaconda.org/conda-forge/linux-aarch64/tk-8.6.12-hd8af866_0.tar.bz2#7894e82ff743bd96c76585ddebe28e2a
-https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.2-h44f6412_6.conda#6d0d1cd6d184129eabb96bb220afb5b2
-https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.0.9-h4e544f5_8.tar.bz2#0980429a0148a53edd0f1f207ec28a39
+https://conda.anaconda.org/conda-forge/linux-aarch64/zstd-1.5.5-h4c53e97_0.conda#b74eb9dbb5c3c15cb3cee7cbdf198c75
+https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.1.0-h31becfc_0.conda#0743026255c480174e65563ad0dc1ef2
 https://conda.anaconda.org/conda-forge/linux-aarch64/ccache-4.8.1-h6552966_0.conda#5b436a19e818f05fe0c9ab4f5ac61233
 https://conda.anaconda.org/conda-forge/linux-aarch64/freetype-2.12.1-hbbbf32d_1.conda#e0891290982420d67651589c8584eec3
 https://conda.anaconda.org/conda-forge/linux-aarch64/libcblas-3.9.0-17_linuxaarch64_openblas.conda#41ed49a8f3a083999c2e733ddc2d4471
 https://conda.anaconda.org/conda-forge/linux-aarch64/liblapack-3.9.0-17_linuxaarch64_openblas.conda#362f230b41a01afb0445abd526a8d3e1
-https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.5.0-h536c0eb_6.conda#75a0916176030b99c03ca2ecfe961128
-https://conda.anaconda.org/conda-forge/linux-aarch64/llvm-openmp-16.0.5-h8b0cb96_0.conda#758ab64e00194a2171aea78bb8666d53
-https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.9.16-hb363c5e_0_cpython.conda#0a7ef29549eaef817898062eeeefebd3
-https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.0.9-h4e544f5_8.tar.bz2#259d82bd990ba225508389509634b157
-https://conda.anaconda.org/conda-forge/noarch/certifi-2023.5.7-pyhd8ed1ab_0.conda#5d1b71c942b8421285934dad1d891ebc
-https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.1.0-pyhd8ed1ab_0.conda#7fcff9f6f123696e940bda77bd4d6551
+https://conda.anaconda.org/conda-forge/linux-aarch64/libtiff-4.5.1-h360e80f_1.conda#e24b0471cfa5536eb55f92178a0ee132
+https://conda.anaconda.org/conda-forge/linux-aarch64/llvm-openmp-16.0.6-h8b0cb96_0.conda#0770c01103884828182937b593ab60d7
+https://conda.anaconda.org/conda-forge/linux-aarch64/python-3.9.18-h4ac3b42_0_cpython.conda#4d36e157278470ac06508579c6d36555
+https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.1.0-h31becfc_0.conda#010f4b1a13cf0e9d0c72e5ece2e3d43c
+https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-python-1.1.0-py39h387a81e_0.conda#0792e6060facc1fe11000a0f42168be2
+https://conda.anaconda.org/conda-forge/noarch/certifi-2023.7.22-pyhd8ed1ab_0.conda#7f3dbc9179b4dde7da98dfb151d0ad22
+https://conda.anaconda.org/conda-forge/noarch/charset-normalizer-3.2.0-pyhd8ed1ab_0.conda#313516e9a4b08b12dfb1e1cd390a96e3
 https://conda.anaconda.org/conda-forge/noarch/colorama-0.4.6-pyhd8ed1ab_0.tar.bz2#3faab06a954c2a04039983f2c4a50d99
 https://conda.anaconda.org/conda-forge/noarch/cycler-0.11.0-pyhd8ed1ab_0.tar.bz2#a50559fad0affdbb33729a68669ca1cb
-https://conda.anaconda.org/conda-forge/linux-aarch64/cython-0.29.35-py39h387a81e_0.conda#e8ba01e9056aca19ffd7df2479f3c6ce
-https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.1-pyhd8ed1ab_0.conda#7312299d7a0ea4993159229b7d2dceb2
-https://conda.anaconda.org/conda-forge/noarch/execnet-1.9.0-pyhd8ed1ab_0.tar.bz2#0e521f7a5e60d508b121d38b04874fb2
+https://conda.anaconda.org/conda-forge/linux-aarch64/cython-0.29.36-py39h387a81e_0.conda#18428f05f95972433b4f29725b7e5f65
+https://conda.anaconda.org/conda-forge/noarch/exceptiongroup-1.1.3-pyhd8ed1ab_0.conda#e6518222753f519e911e83136d2158d9
+https://conda.anaconda.org/conda-forge/noarch/execnet-2.0.2-pyhd8ed1ab_0.conda#67de0d8241e1060a479e3c37793e26f9
 https://conda.anaconda.org/conda-forge/noarch/idna-3.4-pyhd8ed1ab_0.tar.bz2#34272b248891bddccc64479f9a7fffed
 https://conda.anaconda.org/conda-forge/noarch/iniconfig-2.0.0-pyhd8ed1ab_0.conda#f800d2da156d08e289b14e87e43c1ae5
-https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.4-py39h110580c_1.tar.bz2#9c045502f6ab8c89bfda6be3c389e503
+https://conda.anaconda.org/conda-forge/linux-aarch64/kiwisolver-1.4.5-py39had2cf8c_0.conda#8bc1f00886105474896e2f724448e59c
 https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.15-h3e0bdec_1.conda#5d6c6a9042e2316cec7410dd085814d1
 https://conda.anaconda.org/conda-forge/linux-aarch64/liblapacke-3.9.0-17_linuxaarch64_openblas.conda#1522e3323e898ae9fadd11424a3c0b75
 https://conda.anaconda.org/conda-forge/noarch/munkres-1.1.4-pyh9f0ad1d_0.tar.bz2#2ba8498c1018c1e9c61eb99b973dfe19
-https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.24.3-py39hf88902c_0.conda#dc4187f9993e49b36eb9c61ce63ed3c5
+https://conda.anaconda.org/conda-forge/linux-aarch64/numpy-1.25.2-py39hf88902c_0.conda#0d8d2e1a1d2dd0bc581b4682ee9fd95a
 https://conda.anaconda.org/conda-forge/linux-aarch64/openjpeg-2.5.0-h9508984_2.conda#3d56d402a845c243f8c2dd3c8e836029
 https://conda.anaconda.org/conda-forge/noarch/packaging-23.1-pyhd8ed1ab_0.conda#91cda59e66e1e4afe9476f8ef98f5c30
-https://conda.anaconda.org/conda-forge/noarch/pluggy-1.0.0-pyhd8ed1ab_5.tar.bz2#7d301a0d25f424d96175f810935f0da9
+https://conda.anaconda.org/conda-forge/noarch/pluggy-1.3.0-pyhd8ed1ab_0.conda#2390bd10bed1f3fdc7a537fb5a447d8d
 https://conda.anaconda.org/conda-forge/noarch/py-1.11.0-pyh6c4a22f_0.tar.bz2#b4613d7e7a493916d867842a6a148054
 https://conda.anaconda.org/conda-forge/noarch/pyparsing-3.0.9-pyhd8ed1ab_0.tar.bz2#e8fbc1b54b25f4b08281467bc13b70cc
 https://conda.anaconda.org/conda-forge/noarch/pysocks-1.7.1-pyha2e5f31_6.tar.bz2#2a7de29fb590ca14b5243c4c812c8025
-https://conda.anaconda.org/conda-forge/noarch/setuptools-67.7.2-pyhd8ed1ab_0.conda#3b68bc43ec6baa48f7354a446267eefe
+https://conda.anaconda.org/conda-forge/noarch/setuptools-68.1.2-pyhd8ed1ab_0.conda#4fe12573bf499ff85a0a364e00cc5c53
 https://conda.anaconda.org/conda-forge/noarch/six-1.16.0-pyh6c4a22f_0.tar.bz2#e5f25f8dbc060e9a8d912e432202afc2
-https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.1.0-pyh8a188c0_0.tar.bz2#a2995ee828f65687ac5b1e71a2ab1e0c
+https://conda.anaconda.org/conda-forge/noarch/threadpoolctl-3.2.0-pyha21a80b_0.conda#978d03388b62173b8e6f79162cf52b86
 https://conda.anaconda.org/conda-forge/noarch/tomli-2.0.1-pyhd8ed1ab_0.tar.bz2#5844808ffab9ebdb694585b50ba02a96
-https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.3.2-py39h7cc1d5f_0.conda#2c853c8bb419699667c452a01f69749f
-https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.6.3-pyha770c72_0.conda#4a3014a4d107d15475d106b751c4e352
+https://conda.anaconda.org/conda-forge/linux-aarch64/tornado-6.3.3-py39h7cc1d5f_0.conda#8638585a9a10a548b881394853f76bc5
+https://conda.anaconda.org/conda-forge/noarch/typing_extensions-4.7.1-pyha770c72_0.conda#c39d6a09fe819de4951c2642629d9115
 https://conda.anaconda.org/conda-forge/linux-aarch64/unicodedata2-15.0.0-py39h0fd3b05_0.tar.bz2#835f1a9631e600e0176593e95e85f73f
-https://conda.anaconda.org/conda-forge/noarch/wheel-0.40.0-pyhd8ed1ab_0.conda#49bb0d9e60ce1db25e151780331bb5f3
-https://conda.anaconda.org/conda-forge/noarch/zipp-3.15.0-pyhd8ed1ab_0.conda#13018819ca8f5b7cc675a8faf1f5fedf
+https://conda.anaconda.org/conda-forge/noarch/wheel-0.41.2-pyhd8ed1ab_0.conda#1ccd092478b3e0ee10d7a891adbf8a4f
+https://conda.anaconda.org/conda-forge/noarch/zipp-3.16.2-pyhd8ed1ab_0.conda#2da0451b54c4563c32490cb1b7cf68a1
 https://conda.anaconda.org/conda-forge/linux-aarch64/blas-devel-3.9.0-17_linuxaarch64_openblas.conda#d8a3c0b2b389b2a64b3a1b5e59ae2e09
-https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.0.7-py39hd9a2fea_0.conda#efa783bf5c2b30aba3cf22599fe0274e
-https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.39.4-py39h898b7ef_0.conda#c10973b2dc04e82014938c14b919e6e0
-https://conda.anaconda.org/conda-forge/noarch/importlib-metadata-6.6.0-pyha770c72_0.conda#f91a5d5175fb7ff2a91952ec7da59cb9
-https://conda.anaconda.org/conda-forge/noarch/importlib_resources-5.12.0-pyhd8ed1ab_0.conda#e5fd2260a231ee63b6969f4801082f2b
-https://conda.anaconda.org/conda-forge/noarch/joblib-1.2.0-pyhd8ed1ab_0.tar.bz2#7583652522d71ad78ba536bba06940eb
-https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-9.5.0-py39hc5b5638_1.conda#0560194d0eab633c666299c993869cca
-https://conda.anaconda.org/conda-forge/noarch/pip-23.1.2-pyhd8ed1ab_0.conda#7288da0d36821349cf1126e8670292df
+https://conda.anaconda.org/conda-forge/linux-aarch64/contourpy-1.1.0-py39hd16970a_0.conda#814adec2c3feda6643d00a55d6b9debf
+https://conda.anaconda.org/conda-forge/linux-aarch64/fonttools-4.42.1-py39h898b7ef_0.conda#c4dd55a5e5a98da30649c0809814ca6d
+https://conda.anaconda.org/conda-forge/noarch/importlib_resources-6.0.1-pyhd8ed1ab_0.conda#d978c61aa5fc2c69380d53ad56b5ae86
+https://conda.anaconda.org/conda-forge/noarch/joblib-1.3.2-pyhd8ed1ab_0.conda#4da50d410f553db77e62ab62ffaa1abc
+https://conda.anaconda.org/conda-forge/linux-aarch64/pillow-10.0.0-py39hc5b5638_0.conda#3a11d8338a85e3f93c5bbda05ec8d3c4
+https://conda.anaconda.org/conda-forge/noarch/pip-23.2.1-pyhd8ed1ab_0.conda#e2783aa3f9235225eec92f9081c5b801
+https://conda.anaconda.org/conda-forge/noarch/pytest-7.4.0-pyhd8ed1ab_0.conda#3cfe9b9e958e7238a386933c75d190db
 https://conda.anaconda.org/conda-forge/noarch/python-dateutil-2.8.2-pyhd8ed1ab_0.tar.bz2#dd999d1cc9f79e67dbb855c8924c7984
-https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.6.3-hd8ed1ab_0.conda#3876f650ed7d0f95d70fa4b647621909
-https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.2-pyhd8ed1ab_0.conda#81a763f3c64fe6d5f32e033b0325265d
+https://conda.anaconda.org/conda-forge/noarch/typing-extensions-4.7.1-hd8ed1ab_0.conda#f96688577f1faa58096d06a45136afa2
+https://conda.anaconda.org/conda-forge/noarch/urllib3-2.0.4-pyhd8ed1ab_0.conda#18badd8fa3648d1beb1fcc7f2e0f756e
 https://conda.anaconda.org/conda-forge/linux-aarch64/blas-2.117-openblas.conda#5f88c5a193286ed9a87afd4b815e8c70
-https://conda.anaconda.org/conda-forge/noarch/importlib-resources-5.12.0-pyhd8ed1ab_0.conda#3544c818f0720c89eb16ae6940ab440b
-https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.5.1-pyhd8ed1ab_0.conda#e2be672aece1f060adf7154f76531a35
-https://conda.anaconda.org/conda-forge/noarch/pytest-7.3.1-pyhd8ed1ab_0.conda#547c7de697ec99b494a28ddde185b5a4
+https://conda.anaconda.org/conda-forge/noarch/importlib-resources-6.0.1-pyhd8ed1ab_0.conda#54661981fd331e20847d8a49543dd9af
+https://conda.anaconda.org/conda-forge/noarch/platformdirs-3.10.0-pyhd8ed1ab_0.conda#0809187ef9b89a3d94a5c24d13936236
+https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
 https://conda.anaconda.org/conda-forge/noarch/requests-2.31.0-pyhd8ed1ab_0.conda#a30144e4156cdbb236f99ebb49828f8b
-https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.7.1-py39h2983639_0.conda#6ca14f00270585ac4ff20b04106817ee
+https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-base-3.7.2-py39h4ad26d3_0.conda#5f86c21c0c21d294f1c3aa9399e389eb
 https://conda.anaconda.org/conda-forge/noarch/pooch-1.7.0-pyha770c72_3.conda#5936894aade8240c867d292aa0d980c6
-https://conda.anaconda.org/conda-forge/noarch/pytest-forked-1.6.0-pyhd8ed1ab_0.conda#a46947638b6e005b63d2d6271da529b0
-https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.7.1-py39ha65689a_0.conda#ba11d081599ada176b3ca99821e1b753
 https://conda.anaconda.org/conda-forge/noarch/pytest-xdist-2.5.0-pyhd8ed1ab_0.tar.bz2#1fdd1f3baccf0deb647385c677a1a48e
-https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.10.1-py39hf88902c_3.conda#032bb28beb0c37c48b6e33dadc18f0ec
+https://conda.anaconda.org/conda-forge/linux-aarch64/matplotlib-3.7.2-py39ha65689a_0.conda#ea8ac50ecf9cd562870764d909a35abc
+https://conda.anaconda.org/conda-forge/linux-aarch64/scipy-1.11.2-py39hf88902c_0.conda#91d995cdb639f080707d23bbf97d55b0
diff --git a/build_tools/generate_authors_table.py b/build_tools/generate_authors_table.py
index d4da0db5be3c1..f438927772619 100644
--- a/build_tools/generate_authors_table.py
+++ b/build_tools/generate_authors_table.py
@@ -6,12 +6,13 @@
 The table should be updated for each new inclusion in the teams.
 Generating the table requires admin rights.
 """
-import sys
-import requests
 import getpass
+import sys
 import time
-from pathlib import Path
 from os import path
+from pathlib import Path
+
+import requests
 
 print("user:", file=sys.stderr)
 user = input()
@@ -42,17 +43,24 @@ def get_contributors():
     """Get the list of contributor profiles. Require admin rights."""
     # get core devs and contributor experience team
     core_devs = []
+    documentation_team = []
     contributor_experience_team = []
     comm_team = []
     core_devs_slug = "core-devs"
     contributor_experience_team_slug = "contributor-experience-team"
     comm_team_slug = "communication-team"
+    documentation_team_slug = "documentation-team"
 
     entry_point = "https://api.github.com/orgs/scikit-learn/"
 
     for team_slug, lst in zip(
-        (core_devs_slug, contributor_experience_team_slug, comm_team_slug),
-        (core_devs, contributor_experience_team, comm_team),
+        (
+            core_devs_slug,
+            contributor_experience_team_slug,
+            comm_team_slug,
+            documentation_team_slug,
+        ),
+        (core_devs, contributor_experience_team, comm_team, documentation_team),
     ):
         for page in [1, 2]:  # 30 per page
             reply = get(f"{entry_point}teams/{team_slug}/members?page={page}")
@@ -66,6 +74,7 @@ def get_contributors():
 
     # keep only the logins
     core_devs = set(c["login"] for c in core_devs)
+    documentation_team = set(c["login"] for c in documentation_team)
     contributor_experience_team = set(c["login"] for c in contributor_experience_team)
     comm_team = set(c["login"] for c in comm_team)
     members = set(c["login"] for c in members)
@@ -80,11 +89,23 @@ def get_contributors():
         core_devs  # remove ogrisel from contributor_experience_team
     )
 
-    emeritus = members - core_devs - contributor_experience_team - comm_team
+    emeritus = (
+        members
+        - core_devs
+        - contributor_experience_team
+        - comm_team
+        - documentation_team
+    )
 
     # hard coded
+    emeritus_contributor_experience_team = {
+        "cmarmo",
+    }
     emeritus_comm_team = {"reshamas"}
 
+    # Up-to-now, we can subtract the team emeritus from the original emeritus
+    emeritus -= emeritus_contributor_experience_team | emeritus_comm_team
+
     comm_team -= {"reshamas"}  # in the comm team but not on the web page
 
     # get profiles from GitHub
@@ -93,13 +114,21 @@ def get_contributors():
     contributor_experience_team = [
         get_profile(login) for login in contributor_experience_team
     ]
+    emeritus_contributor_experience_team = [
+        get_profile(login) for login in emeritus_contributor_experience_team
+    ]
     comm_team = [get_profile(login) for login in comm_team]
     emeritus_comm_team = [get_profile(login) for login in emeritus_comm_team]
+    documentation_team = [get_profile(login) for login in documentation_team]
 
     # sort by last name
     core_devs = sorted(core_devs, key=key)
     emeritus = sorted(emeritus, key=key)
     contributor_experience_team = sorted(contributor_experience_team, key=key)
+    emeritus_contributor_experience_team = sorted(
+        emeritus_contributor_experience_team, key=key
+    )
+    documentation_team = sorted(documentation_team, key=key)
     comm_team = sorted(comm_team, key=key)
     emeritus_comm_team = sorted(emeritus_comm_team, key=key)
 
@@ -107,8 +136,10 @@ def get_contributors():
         core_devs,
         emeritus,
         contributor_experience_team,
+        emeritus_contributor_experience_team,
         comm_team,
         emeritus_comm_team,
+        documentation_team,
     )
 
 
@@ -176,8 +207,10 @@ def generate_list(contributors):
         core_devs,
         emeritus,
         contributor_experience_team,
+        emeritus_contributor_experience_team,
         comm_team,
         emeritus_comm_team,
+        documentation_team,
     ) = get_contributors()
 
     with open(REPO_FOLDER / "doc" / "authors.rst", "w+", encoding="utf-8") as rst_file:
@@ -193,6 +226,13 @@ def generate_list(contributors):
     ) as rst_file:
         rst_file.write(generate_table(contributor_experience_team))
 
+    with open(
+        REPO_FOLDER / "doc" / "contributor_experience_team_emeritus.rst",
+        "w+",
+        encoding="utf-8",
+    ) as rst_file:
+        rst_file.write(generate_list(emeritus_contributor_experience_team))
+
     with open(
         REPO_FOLDER / "doc" / "communication_team.rst", "w+", encoding="utf-8"
     ) as rst_file:
@@ -202,3 +242,8 @@ def generate_list(contributors):
         REPO_FOLDER / "doc" / "communication_team_emeritus.rst", "w+", encoding="utf-8"
     ) as rst_file:
         rst_file.write(generate_list(emeritus_comm_team))
+
+    with open(
+        REPO_FOLDER / "doc" / "documentation_team.rst", "w+", encoding="utf-8"
+    ) as rst_file:
+        rst_file.write(generate_table(documentation_team))
diff --git a/build_tools/get_comment.py b/build_tools/get_comment.py
new file mode 100644
index 0000000000000..64c5784e0cd06
--- /dev/null
+++ b/build_tools/get_comment.py
@@ -0,0 +1,357 @@
+# This script is used to generate a comment for a PR when linting issues are
+# detected. It is used by the `Comment on failed linting` GitHub Action.
+# This script fails if there are not comments to be posted.
+
+import os
+
+import requests
+
+
+def get_versions(versions_file):
+    """Get the versions of the packages used in the linter job.
+
+    Parameters
+    ----------
+    versions_file : str
+        The path to the file that contains the versions of the packages.
+
+    Returns
+    -------
+    versions : dict
+        A dictionary with the versions of the packages.
+    """
+    with open("versions.txt", "r") as f:
+        return dict(line.strip().split("=") for line in f)
+
+
+def get_step_message(log, start, end, title, message, details):
+    """Get the message for a specific test.
+
+    Parameters
+    ----------
+    log : str
+        The log of the linting job.
+
+    start : str
+        The string that marks the start of the test.
+
+    end : str
+        The string that marks the end of the test.
+
+    title : str
+        The title for this section.
+
+    message : str
+        The message to be added at the beginning of the section.
+
+    details : bool
+        Whether to add the details of each step.
+
+    Returns
+    -------
+    message : str
+        The message to be added to the comment.
+    """
+    if end not in log:
+        return ""
+    res = (
+        "-----------------------------------------------\n"
+        + f"### {title}\n\n"
+        + message
+        + "\n\n"
+    )
+    if details:
+        res += (
+            "<details>\n\n```\n"
+            + log[log.find(start) + len(start) + 1 : log.find(end) - 1]
+            + "\n```\n\n</details>\n\n"
+        )
+    return res
+
+
+def get_message(log_file, repo, pr_number, sha, run_id, details, versions):
+    with open(log_file, "r") as f:
+        log = f.read()
+
+    sub_text = (
+        "\n\n<sub> _Generated for commit:"
+        f" [{sha[:7]}](https://github.com/{repo}/pull/{pr_number}/commits/{sha}). "
+        "Link to the linter CI: [here]"
+        f"(https://github.com/{repo}/actions/runs/{run_id})_ </sub>"
+    )
+
+    if "### Linting completed ###" not in log:
+        return (
+            "## ❌ Linting issues\n\n"
+            "There was an issue running the linter job. Please update with "
+            "`upstream/main` ([link]("
+            "https://scikit-learn.org/dev/developers/contributing.html"
+            "#how-to-contribute)) and push the changes. If you already have done "
+            "that, please send an empty commit with `git commit --allow-empty` "
+            "and push the changes to trigger the CI.\n\n"
+            + sub_text
+        )
+
+    message = ""
+
+    # black
+    message += get_step_message(
+        log,
+        start="### Running black ###",
+        end="Problems detected by black",
+        title="`black`",
+        message=(
+            "`black` detected issues. Please run `black .` locally and push "
+            "the changes. Here you can see the detected issues. Note that "
+            "running black might also fix some of the issues which might be "
+            "detected by `ruff`. Note that the installed `black` version is "
+            f"`black={versions['black']}`."
+        ),
+        details=details,
+    )
+
+    # ruff
+    message += get_step_message(
+        log,
+        start="### Running ruff ###",
+        end="Problems detected by ruff",
+        title="`ruff`",
+        message=(
+            "`ruff` detected issues. Please run `ruff --fix --show-source .` "
+            "locally, fix the remaining issues, and push the changes. "
+            "Here you can see the detected issues. Note that the installed "
+            f"`ruff` version is `ruff={versions['ruff']}`."
+        ),
+        details=details,
+    )
+
+    # mypy
+    message += get_step_message(
+        log,
+        start="### Running mypy ###",
+        end="Problems detected by mypy",
+        title="`mypy`",
+        message=(
+            "`mypy` detected issues. Please fix them locally and push the changes. "
+            "Here you can see the detected issues. Note that the installed `mypy` "
+            f"version is `mypy={versions['mypy']}`."
+        ),
+        details=details,
+    )
+
+    # cython-lint
+    message += get_step_message(
+        log,
+        start="### Running cython-lint ###",
+        end="Problems detected by cython-lint",
+        title="`cython-lint`",
+        message=(
+            "`cython-lint` detected issues. Please fix them locally and push "
+            "the changes. Here you can see the detected issues. Note that the "
+            "installed `cython-lint` version is "
+            f"`cython-lint={versions['cython-lint']}`."
+        ),
+        details=details,
+    )
+
+    # deprecation order
+    message += get_step_message(
+        log,
+        start="### Checking for bad deprecation order ###",
+        end="Problems detected by deprecation order check",
+        title="Deprecation Order",
+        message=(
+            "Deprecation order check detected issues. Please fix them locally and "
+            "push the changes. Here you can see the detected issues."
+        ),
+        details=details,
+    )
+
+    # doctest directives
+    message += get_step_message(
+        log,
+        start="### Checking for default doctest directives ###",
+        end="Problems detected by doctest directive check",
+        title="Doctest Directives",
+        message=(
+            "doctest directive check detected issues. Please fix them locally and "
+            "push the changes. Here you can see the detected issues."
+        ),
+        details=details,
+    )
+
+    # joblib imports
+    message += get_step_message(
+        log,
+        start="### Checking for joblib imports ###",
+        end="Problems detected by joblib import check",
+        title="Joblib Imports",
+        message=(
+            "`joblib` import check detected issues. Please fix them locally and "
+            "push the changes. Here you can see the detected issues."
+        ),
+        details=details,
+    )
+
+    if not message:
+        # no issues detected, so this script "fails"
+        return (
+            "## ✔️ Linting Passed\n"
+            "All linting checks passed. Your pull request is in excellent shape! ☀️"
+            + sub_text
+        )
+
+    if not details:
+        # This happens if posting the log fails, which happens if the log is too
+        # long. Typically, this happens if the PR branch hasn't been updated
+        # since we've introduced import sorting.
+        branch_not_updated = (
+            "_Merging with `upstream/main` might fix / improve the issues if you "
+            "haven't done that since 21.06.2023._\n\n"
+        )
+    else:
+        branch_not_updated = ""
+
+    message = (
+        "## ❌ Linting issues\n\n"
+        + branch_not_updated
+        + "This PR is introducing linting issues. Here's a summary of the issues. "
+        + "Note that you can avoid having linting issues by enabling `pre-commit` "
+        + "hooks. Instructions to enable them can be found [here]("
+        + "https://scikit-learn.org/dev/developers/contributing.html#how-to-contribute)"
+        + ".\n\n"
+        + "You can see the details of the linting issues under the `lint` job [here]"
+        + f"(https://github.com/{repo}/actions/runs/{run_id})\n\n"
+        + message
+        + sub_text
+    )
+
+    return message
+
+
+def get_headers(token):
+    """Get the headers for the GitHub API."""
+    return {
+        "Accept": "application/vnd.github+json",
+        "Authorization": f"Bearer {token}",
+        "X-GitHub-Api-Version": "2022-11-28",
+    }
+
+
+def find_lint_bot_comments(repo, token, pr_number):
+    """Get the comment from the linting bot."""
+    # repo is in the form of "org/repo"
+    # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#list-issue-comments  # noqa
+    response = requests.get(
+        f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments",
+        headers=get_headers(token),
+    )
+    response.raise_for_status()
+    all_comments = response.json()
+
+    failed_comment = "❌ Linting issues"
+    success_comment = "✔️ Linting Passed"
+
+    # Find all comments that match the linting bot, and return the first one.
+    # There should always be only one such comment, or none, if the PR is
+    # just created.
+    comments = [
+        comment
+        for comment in all_comments
+        if comment["user"]["login"] == "github-actions[bot]"
+        and (failed_comment in comment["body"] or success_comment in comment["body"])
+    ]
+
+    if len(all_comments) > 25 and not comments:
+        # By default the API returns the first 30 comments. If we can't find the
+        # comment created by the bot in those, then we raise and we skip creating
+        # a comment in the first place.
+        raise RuntimeError("Comment not found in the first 30 comments.")
+
+    return comments[0] if comments else None
+
+
+def create_or_update_comment(comment, message, repo, pr_number, token):
+    """Create a new comment or update existing one."""
+    # repo is in the form of "org/repo"
+    if comment is not None:
+        print("updating existing comment")
+        # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#update-an-issue-comment  # noqa
+        response = requests.patch(
+            f"https://api.github.com/repos/{repo}/issues/comments/{comment['id']}",
+            headers=get_headers(token),
+            json={"body": message},
+        )
+    else:
+        print("creating new comment")
+        # API doc: https://docs.github.com/en/rest/issues/comments?apiVersion=2022-11-28#create-an-issue-comment  # noqa
+        response = requests.post(
+            f"https://api.github.com/repos/{repo}/issues/{pr_number}/comments",
+            headers=get_headers(token),
+            json={"body": message},
+        )
+
+    response.raise_for_status()
+
+
+if __name__ == "__main__":
+    repo = os.environ["GITHUB_REPOSITORY"]
+    token = os.environ["GITHUB_TOKEN"]
+    pr_number = os.environ["PR_NUMBER"]
+    sha = os.environ["BRANCH_SHA"]
+    log_file = os.environ["LOG_FILE"]
+    run_id = os.environ["RUN_ID"]
+    versions_file = os.environ["VERSIONS_FILE"]
+
+    versions = get_versions(versions_file)
+
+    if not repo or not token or not pr_number or not log_file or not run_id:
+        raise ValueError(
+            "One of the following environment variables is not set: "
+            "GITHUB_REPOSITORY, GITHUB_TOKEN, PR_NUMBER, LOG_FILE, RUN_ID"
+        )
+
+    try:
+        comment = find_lint_bot_comments(repo, token, pr_number)
+    except RuntimeError:
+        print("Comment not found in the first 30 comments. Skipping!")
+        exit(0)
+
+    try:
+        message = get_message(
+            log_file,
+            repo=repo,
+            pr_number=pr_number,
+            sha=sha,
+            run_id=run_id,
+            details=True,
+            versions=versions,
+        )
+        create_or_update_comment(
+            comment=comment,
+            message=message,
+            repo=repo,
+            pr_number=pr_number,
+            token=token,
+        )
+        print(message)
+    except requests.HTTPError:
+        # The above fails if the message is too long. In that case, we
+        # try again without the details.
+        message = get_message(
+            log_file,
+            repo=repo,
+            pr_number=pr_number,
+            sha=sha,
+            run_id=run_id,
+            details=False,
+            versions=versions,
+        )
+        create_or_update_comment(
+            comment=comment,
+            message=message,
+            repo=repo,
+            pr_number=pr_number,
+            token=token,
+        )
+        print(message)
diff --git a/build_tools/github/build_minimal_windows_image.sh b/build_tools/github/build_minimal_windows_image.sh
index 4399bfa80704e..aa7bfc3e31f9f 100755
--- a/build_tools/github/build_minimal_windows_image.sh
+++ b/build_tools/github/build_minimal_windows_image.sh
@@ -14,6 +14,9 @@ cp $WHEEL_PATH $WHEEL_NAME
 # Dot the Python version for identyfing the base Docker image
 PYTHON_VERSION=$(echo ${PYTHON_VERSION:0:1}.${PYTHON_VERSION:1:2})
 
+if [[ "$CIBW_PRERELEASE_PYTHONS" == "True" ]]; then
+    PYTHON_VERSION="$PYTHON_VERSION-rc"
+fi
 # Build a minimal Windows Docker image for testing the wheels
 docker build --build-arg PYTHON_VERSION=$PYTHON_VERSION \
              --build-arg WHEEL_NAME=$WHEEL_NAME \
diff --git a/build_tools/github/check_wheels.py b/build_tools/github/check_wheels.py
index 99d319cba4dc5..3860d3e81adb7 100644
--- a/build_tools/github/check_wheels.py
+++ b/build_tools/github/check_wheels.py
@@ -1,8 +1,9 @@
 """Checks that dist/* contains the number of wheels built from the
 .github/workflows/wheels.yml config."""
-import yaml
-from pathlib import Path
 import sys
+from pathlib import Path
+
+import yaml
 
 gh_wheel_path = Path.cwd() / ".github" / "workflows" / "wheels.yml"
 with gh_wheel_path.open("r") as f:
diff --git a/build_tools/github/upload_anaconda.sh b/build_tools/github/upload_anaconda.sh
index 60cab7f8dcf4a..eb9576e222085 100755
--- a/build_tools/github/upload_anaconda.sh
+++ b/build_tools/github/upload_anaconda.sh
@@ -4,7 +4,7 @@ set -e
 set -x
 
 if [ "$GITHUB_EVENT_NAME" == "schedule" ]; then
-    ANACONDA_ORG="scipy-wheels-nightly"
+    ANACONDA_ORG="scientific-python-nightly-wheels"
     ANACONDA_TOKEN="$SCIKIT_LEARN_NIGHTLY_UPLOAD_TOKEN"
 else
     ANACONDA_ORG="scikit-learn-wheels-staging"
diff --git a/build_tools/github/vendor.py b/build_tools/github/vendor.py
index 2997688423b84..3bc1aceb3437c 100644
--- a/build_tools/github/vendor.py
+++ b/build_tools/github/vendor.py
@@ -7,7 +7,6 @@
 import sys
 import textwrap
 
-
 TARGET_FOLDER = op.join("sklearn", ".libs")
 DISTRIBUTOR_INIT = op.join("sklearn", "_distributor_init.py")
 VCOMP140_SRC_PATH = "C:\\Windows\\System32\\vcomp140.dll"
diff --git a/build_tools/linting.sh b/build_tools/linting.sh
index dd200b9d9cd95..28d16a8bbed32 100755
--- a/build_tools/linting.sh
+++ b/build_tools/linting.sh
@@ -1,27 +1,65 @@
 #!/bin/bash
 
-set -e
+# Note that any change in this file, adding or removing steps or changing the
+# printed messages, should be also reflected in the `get_comment.py` file.
+
+# This script shouldn't exit if a command / pipeline fails
+set +e
 # pipefail is necessary to propagate exit codes
 set -o pipefail
 
+global_status=0
+
+echo -e "### Running black ###\n"
 black --check --diff .
-echo -e "No problem detected by black\n"
+status=$?
+
+if [[ $status -eq 0 ]]
+then
+    echo -e "No problem detected by black\n"
+else
+    echo -e "Problems detected by black, please run black and commit the result\n"
+    global_status=1
+fi
 
-flake8 --show-source .
-echo -e "No problem detected by flake8\n"
+echo -e "### Running ruff ###\n"
+ruff check --show-source .
+status=$?
+if [[ $status -eq 0 ]]
+then
+    echo -e "No problem detected by ruff\n"
+else
+    echo -e "Problems detected by ruff, please fix them\n"
+    global_status=1
+fi
 
+echo -e "### Running mypy ###\n"
 mypy sklearn/
-echo -e "No problem detected by mypy\n"
+status=$?
+if [[ $status -eq 0 ]]
+then
+    echo -e "No problem detected by mypy\n"
+else
+    echo -e "Problems detected by mypy, please fix them\n"
+    global_status=1
+fi
 
+echo -e "### Running cython-lint ###\n"
 cython-lint sklearn/
-echo -e "No problem detected by cython-lint\n"
+status=$?
+if [[ $status -eq 0 ]]
+then
+    echo -e "No problem detected by cython-lint\n"
+else
+    echo -e "Problems detected by cython-lint, please fix them\n"
+    global_status=1
+fi
 
 # For docstrings and warnings of deprecated attributes to be rendered
 # properly, the property decorator must come before the deprecated decorator
 # (else they are treated as functions)
 
-# do not error when grep -B1 "@property" finds nothing
-set +e
+echo -e "### Checking for bad deprecation order ###\n"
 bad_deprecation_property_order=`git grep -A 10 "@property"  -- "*.py" | awk '/@property/,/def /' | grep -B1 "@deprecated"`
 
 if [ ! -z "$bad_deprecation_property_order" ]
@@ -29,29 +67,59 @@ then
     echo "property decorator should come before deprecated decorator"
     echo "found the following occurrences:"
     echo $bad_deprecation_property_order
-    exit 1
+    echo -e "\nProblems detected by deprecation order check\n"
+    global_status=1
+else
+    echo -e "No problems detected related to deprecation order\n"
 fi
 
 # Check for default doctest directives ELLIPSIS and NORMALIZE_WHITESPACE
 
+echo -e "### Checking for default doctest directives ###\n"
 doctest_directive="$(git grep -nw -E "# doctest\: \+(ELLIPSIS|NORMALIZE_WHITESPACE)")"
 
 if [ ! -z "$doctest_directive" ]
 then
     echo "ELLIPSIS and NORMALIZE_WHITESPACE doctest directives are enabled by default, but were found in:"
     echo "$doctest_directive"
-    exit 1
+    echo -e "\nProblems detected by doctest directive check\n"
+    global_status=1
+else
+    echo -e "No problems detected related to doctest directives\n"
 fi
 
+# Check for joblib.delayed and joblib.Parallel imports
+
+echo -e "### Checking for joblib imports ###\n"
+joblib_status=0
 joblib_delayed_import="$(git grep -l -A 10 -E "joblib import.+delayed" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")"
 if [ ! -z "$joblib_delayed_import" ]; then
     echo "Use from sklearn.utils.parallel import delayed instead of joblib delayed. The following files contains imports to joblib.delayed:"
     echo "$joblib_delayed_import"
-    exit 1
+    joblib_status=1
 fi
 joblib_Parallel_import="$(git grep -l -A 10 -E "joblib import.+Parallel" -- "*.py" ":!sklearn/utils/_joblib.py" ":!sklearn/utils/parallel.py")"
 if [ ! -z "$joblib_Parallel_import" ]; then
     echo "Use from sklearn.utils.parallel import Parallel instead of joblib Parallel. The following files contains imports to joblib.Parallel:"
     echo "$joblib_Parallel_import"
+    joblib_status=1
+fi
+
+if [[ $joblib_status -eq 0 ]]
+then
+    echo -e "No problems detected related to joblib imports\n"
+else
+    echo -e "\nProblems detected by joblib import check\n"
+    global_status=1
+fi
+
+echo -e "### Linting completed ###\n"
+
+if [[ $global_status -eq 1 ]]
+then
+    echo -e "Linting failed\n"
     exit 1
+else
+    echo -e "Linting passed\n"
+    exit 0
 fi
diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py
index 28910a07d899a..d5ad0185bd685 100644
--- a/build_tools/update_environments_and_lock_files.py
+++ b/build_tools/update_environments_and_lock_files.py
@@ -27,20 +27,24 @@
   sklearn/_min_dependencies.py
 - pip-tools
 
+To only update the environment and lock files for specific builds, you can use
+the command line argument `--select-build` which will take a regex. For example,
+to only update the documentation builds you can use:
+`python build_tools/update_environments_and_lock_files.py --select-build doc`
 """
 
+import json
+import logging
 import re
+import shlex
 import subprocess
 import sys
-from pathlib import Path
-import shlex
-import json
-import logging
 from importlib.metadata import version
+from pathlib import Path
 
 import click
-
 from jinja2 import Environment
+from packaging.version import Version
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -122,6 +126,10 @@ def remove_from(alist, to_remove):
         "conda_dependencies": common_dependencies + ["ccache"],
         "package_constraints": {
             "blas": "[build=mkl]",
+            # TODO: temporary pin for numpy to avoid what seems a loky issue,
+            # for more details see
+            # https://github.com/scikit-learn/scikit-learn/pull/26845#issuecomment-1639917135
+            "numpy": "<1.25",
         },
     },
     {
@@ -147,6 +155,9 @@ def remove_from(alist, to_remove):
             "scipy": "min",
             "matplotlib": "min",
             "threadpoolctl": "2.2.0",
+            # Regression have been observed with Cython>=3.0.0.
+            # See: https://github.com/scikit-learn/scikit-learn/issues/27086
+            "cython": "<3.0.0",
         },
     },
     {
@@ -155,7 +166,13 @@ def remove_from(alist, to_remove):
         "platform": "linux-64",
         "channel": "conda-forge",
         "conda_dependencies": common_dependencies_without_coverage + ["ccache"],
-        "package_constraints": {"python": "3.8", "blas": "[build=openblas]"},
+        "package_constraints": {
+            "python": "3.8",
+            "blas": "[build=openblas]",
+            # Regression have been observed with Cython>=3.0.0.
+            # See: https://github.com/scikit-learn/scikit-learn/issues/27086
+            "cython": "<3.0.0",
+        },
     },
     {
         "build_name": "pylatest_pip_openblas_pandas",
@@ -219,6 +236,9 @@ def remove_from(alist, to_remove):
         "package_constraints": {
             "blas": "[build=openblas]",
             "python": "3.9",
+            # Regression have been observed with Cython>=3.0.0.
+            # See: https://github.com/scikit-learn/scikit-learn/issues/27086
+            "cython": "<3.0.0",
         },
     },
     {
@@ -233,6 +253,9 @@ def remove_from(alist, to_remove):
         "package_constraints": {
             "python": "3.8",
             "blas": "[build=mkl]",
+            # Regression have been observed with Cython>=3.0.0.
+            # See: https://github.com/scikit-learn/scikit-learn/issues/27086
+            "cython": "<3.0.0",
         },
     },
     {
@@ -247,6 +270,7 @@ def remove_from(alist, to_remove):
             "compilers",
             "sphinx",
             "sphinx-gallery",
+            "sphinx-copybutton",
             "numpydoc",
             "sphinx-prompt",
             "plotly",
@@ -263,6 +287,7 @@ def remove_from(alist, to_remove):
             "sphinx": "min",
             "pandas": "min",
             "sphinx-gallery": "min",
+            "sphinx-copybutton": "min",
             "numpydoc": "min",
             "sphinx-prompt": "min",
             "sphinxext-opengraph": "min",
@@ -281,6 +306,7 @@ def remove_from(alist, to_remove):
             "compilers",
             "sphinx",
             "sphinx-gallery",
+            "sphinx-copybutton",
             "numpydoc",
             "sphinx-prompt",
             "plotly",
@@ -292,6 +318,9 @@ def remove_from(alist, to_remove):
             "python": "3.9",
             # XXX: sphinx > 6.0 does not correctly generate searchindex.js
             "sphinx": "6.0.0",
+            # Regression have been observed with Cython>=3.0.0.
+            # See: https://github.com/scikit-learn/scikit-learn/issues/27086
+            "cython": "<3.0.0",
         },
     },
     {
@@ -304,6 +333,9 @@ def remove_from(alist, to_remove):
         ) + ["pip", "ccache"],
         "package_constraints": {
             "python": "3.9",
+            # Regression have been observed with Cython>=3.0.0.
+            # See: https://github.com/scikit-learn/scikit-learn/issues/27086
+            "cython": "<3.0.0",
         },
     },
 ]
@@ -326,6 +358,9 @@ def remove_from(alist, to_remove):
             "pytest": "min",
             "pytest-cov": "min",
             # no pytest-xdist because it causes issue on 32bit
+            # Regression have been observed with Cython>=3.0.0.
+            # See: https://github.com/scikit-learn/scikit-learn/issues/27086
+            "cython": "<3.0.0",
         },
         # same Python version as in debian-32 build
         "python_version": "3.9.2",
@@ -340,7 +375,13 @@ def remove_from(alist, to_remove):
             "pytest",
             "pytest-xdist",
         ],
-        "package_constraints": {"joblib": "min", "threadpoolctl": "min"},
+        "package_constraints": {
+            "joblib": "min",
+            "threadpoolctl": "min",
+            # Regression have been observed with Cython>=3.0.0.
+            # See: https://github.com/scikit-learn/scikit-learn/issues/27086
+            "cython": "<3.0.0",
+        },
         "python_version": "3.10.4",
     },
 ]
@@ -539,6 +580,22 @@ def check_conda_lock_version():
         )
 
 
+def check_conda_version():
+    # Avoid issues with glibc (https://github.com/conda/conda-lock/issues/292)
+    # or osx (https://github.com/conda/conda-lock/issues/408) virtual package.
+    # The glibc one has been fixed in conda 23.1.0 and the osx has been fixed
+    # in conda 23.7.0.
+    conda_info_output = execute_command(["conda", "info", "--json"])
+
+    conda_info = json.loads(conda_info_output)
+    conda_version = Version(conda_info["conda_version"])
+
+    if Version("22.9.0") < conda_version < Version("23.7"):
+        raise RuntimeError(
+            f"conda version should be <= 22.9.0 or >= 23.7 got: {conda_version}"
+        )
+
+
 @click.command()
 @click.option(
     "--select-build",
@@ -547,6 +604,7 @@ def check_conda_lock_version():
 )
 def main(select_build):
     check_conda_lock_version()
+    check_conda_version()
     filtered_conda_build_metadata_list = [
         each
         for each in conda_build_metadata_list
diff --git a/doc/Makefile b/doc/Makefile
index 2ee611ccb5cf0..44f02585f6205 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -2,7 +2,7 @@
 #
 
 # You can set these variables from the command line.
-SPHINXOPTS    =
+SPHINXOPTS    = -T
 SPHINXBUILD  ?= sphinx-build
 PAPER         =
 BUILDDIR      = _build
@@ -24,7 +24,7 @@ endif
 # Internal variables.
 PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
-ALLSPHINXOPTS   = -T -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS)\
     $(EXAMPLES_PATTERN_OPTS) .
 
 
diff --git a/doc/about.rst b/doc/about.rst
index eabd8d5e251d9..e462963135b58 100644
--- a/doc/about.rst
+++ b/doc/about.rst
@@ -39,6 +39,13 @@ in the FAQ.
 
    :ref:`How you can contribute to the project <contributing>`
 
+Documentation Team
+------------------
+
+The following people help with documenting the project:
+
+.. include:: documentation_team.rst
+
 Contributor Experience Team
 ---------------------------
 
@@ -154,20 +161,16 @@ The project would like to thank the following funders.
 
 The `Members <https://scikit-learn.fondation-inria.fr/en/home/#sponsors>`_ of
 the `Scikit-Learn Consortium at Inria Foundation
-<https://scikit-learn.fondation-inria.fr/en/home/>`_  fund Olivier
-Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
+<https://scikit-learn.fondation-inria.fr/en/home/>`_  fund Arturo Amor,
+François Goupil, Guillaume Lemaitre, Jérémie du Boisberranger, and Olivier Grisel.
 
 .. raw:: html
 
    </div>
 
-.. |msn| image:: images/microsoft.png
-   :width: 100pt
-   :target: https://www.microsoft.com/
-
-.. |bcg| image:: images/bcg.png
-   :width: 100pt
-   :target: https://www.bcg.com/beyond-consulting/bcg-gamma/default.aspx
+.. |chanel| image:: images/chanel.png
+   :width: 70pt
+   :target: https://www.chanel.com
 
 .. |axa| image:: images/axa.png
    :width: 50pt
@@ -177,17 +180,17 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
    :width: 150pt
    :target: https://www.bnpparibascardif.com/
 
-.. |fujitsu| image:: images/fujitsu.png
-   :width: 100pt
-   :target: https://www.fujitsu.com/global/
-
 .. |dataiku| image:: images/dataiku.png
    :width: 70pt
    :target: https://www.dataiku.com/
 
-.. |aphp| image:: images/logo_APHP_text.png
-   :width: 150pt
-   :target: https://aphp.fr/
+.. |hf| image:: images/huggingface_logo-noborder.png
+   :width: 70pt
+   :target: https://huggingface.co
+
+.. |nvidia| image:: images/nvidia.png
+   :width: 70pt
+   :target: https://www.nvidia.com
 
 .. |inria| image:: images/inria-logo.jpg
    :width: 100pt
@@ -201,25 +204,25 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
 .. table::
    :class: sk-sponsor-table align-default
 
-   +---------+----------+
-   |       |bcg|        |
-   +---------+----------+
-   |                    |
-   +---------+----------+
-   |  |axa|  |   |bnp|  |
-   +---------+----------+
-   ||fujitsu||  |msn|   |
-   +---------+----------+
-   |                    |
-   +---------+----------+
-   |     |dataiku|      |
-   +---------+----------+
-   |       |aphp|       |
-   +---------+----------+
-   |                    |
-   +---------+----------+
-   |       |inria|      |
-   +---------+----------+
+   +----------+-----------+
+   |       |chanel|       |
+   +----------+-----------+
+   |                      |
+   +----------+-----------+
+   |  |axa|   |    |bnp|  |
+   +----------+-----------+
+   |                      |
+   +----------+-----------+
+   | |nvidia| |    |hf|   |
+   +----------+-----------+
+   |                      |
+   +----------+-----------+
+   |       |dataiku|      |
+   +----------+-----------+
+   |                      |
+   +----------+-----------+
+   |        |inria|       |
+   +----------+-----------+
 
 .. raw:: html
 
@@ -233,7 +236,35 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
    <div class="sk-sponsor-div">
    <div class="sk-sponsor-div-box">
 
-`Hugging Face <https://huggingface.co/>`_ funds Adrin Jalali since 2022.
+`NVidia <https://nvidia.com>`_ funds Tim Head since 2022
+and is part of the scikit-learn consortium at Inria.
+
+.. raw:: html
+
+   </div>
+
+   <div class="sk-sponsor-div-box">
+
+.. image:: images/nvidia.png
+   :width: 55pt
+   :align: center
+   :target: https://nvidia.com
+
+.. raw:: html
+
+   </div>
+   </div>
+
+
+..........
+
+.. raw:: html
+
+   <div class="sk-sponsor-div">
+   <div class="sk-sponsor-div-box">
+
+`Hugging Face <https://huggingface.co/>`_ funded Adrin Jalali in 2022,
+2023 and is part of the scikit-learn consortium at Inria.
 
 .. raw:: html
 
@@ -283,7 +314,8 @@ Grisel, Guillaume Lemaitre, and Jérémie du Boisberranger.
    <div class="sk-sponsor-div">
    <div class="sk-sponsor-div-box">
 
-`Quansight Labs <https://labs.quansight.org>`_ funds Thomas J. Fan since 2021.
+`Quansight Labs <https://labs.quansight.org>`_ funds Lucy Liu and
+Meekail Zain since 2022 and funded Thomas J. Fan from 2021 to 2023.
 
 .. raw:: html
 
@@ -566,6 +598,31 @@ The `NeuroDebian <http://neuro.debian.net>`_ project providing `Debian
 `Dr. James V. Haxby <http://haxbylab.dartmouth.edu/>`_ (`Dartmouth
 College <https://pbs.dartmouth.edu/>`_).
 
+...................
+
+The following organizations funded the scikit-learn consortium at Inria in
+the past:
+
+.. |msn| image:: images/microsoft.png
+   :width: 100pt
+   :target: https://www.microsoft.com/
+
+.. |bcg| image:: images/bcg.png
+   :width: 100pt
+   :target: https://www.bcg.com/beyond-consulting/bcg-gamma/default.aspx
+
+.. |fujitsu| image:: images/fujitsu.png
+   :width: 100pt
+   :target: https://www.fujitsu.com/global/
+
+.. |aphp| image:: images/logo_APHP_text.png
+   :width: 150pt
+   :target: https://aphp.fr/
+
+
+|bcg| |msn| |fujitsu| |aphp|
+
+
 Sprints
 -------
 
diff --git a/doc/authors.rst b/doc/authors.rst
index e2d027fa40506..ddad9803ee8ab 100644
--- a/doc/authors.rst
+++ b/doc/authors.rst
@@ -78,6 +78,10 @@
     <p>Hanmin Qin</p>
     </div>
     <div>
+    <a href='https://github.com/OmarManzoor'><img src='https://avatars.githubusercontent.com/u/17495884?v=4' class='avatar' /></a> <br />
+    <p>Omar Salman</p>
+    </div>
+    <div>
     <a href='https://github.com/bthirion'><img src='https://avatars.githubusercontent.com/u/234454?v=4' class='avatar' /></a> <br />
     <p>Bertrand Thirion</p>
     </div>
diff --git a/doc/authors_emeritus.rst b/doc/authors_emeritus.rst
index a56e2bc408ff4..b979b77bba974 100644
--- a/doc/authors_emeritus.rst
+++ b/doc/authors_emeritus.rst
@@ -20,7 +20,6 @@
 - Wei Li
 - Paolo Losi
 - Gilles Louppe
-- Chiara Marmo
 - Vincent Michel
 - Jarrod Millman
 - Alexandre Passos
diff --git a/doc/common_pitfalls.rst b/doc/common_pitfalls.rst
index 77341047857b5..eef995754d2bb 100644
--- a/doc/common_pitfalls.rst
+++ b/doc/common_pitfalls.rst
@@ -416,7 +416,7 @@ it will allow the estimator RNG to vary for each fold.
 **Cloning**
 
 Another subtle side effect of passing `RandomState` instances is how
-:func:`~sklearn.clone` will work::
+:func:`~sklearn.base.clone` will work::
 
     >>> from sklearn import clone
     >>> from sklearn.ensemble import RandomForestClassifier
@@ -439,10 +439,10 @@ If an integer were passed, `a` and `b` would be exact clones and they would not
 influence each other.
 
 .. warning::
-    Even though :func:`~sklearn.clone` is rarely used in user code, it is
+    Even though :func:`~sklearn.base.clone` is rarely used in user code, it is
     called pervasively throughout scikit-learn codebase: in particular, most
     meta-estimators that accept non-fitted estimators call
-    :func:`~sklearn.clone` internally
+    :func:`~sklearn.base.clone` internally
     (:class:`~sklearn.model_selection.GridSearchCV`,
     :class:`~sklearn.ensemble.StackingClassifier`,
     :class:`~sklearn.calibration.CalibratedClassifierCV`, etc.).
@@ -553,7 +553,7 @@ When we evaluate a randomized estimator performance by cross-validation, we
 want to make sure that the estimator can yield accurate predictions for new
 data, but we also want to make sure that the estimator is robust w.r.t. its
 random initialization. For example, we would like the random weights
-initialization of a :class:`~sklearn.linear_model.SGDCLassifier` to be
+initialization of a :class:`~sklearn.linear_model.SGDClassifier` to be
 consistently good across all folds: otherwise, when we train that estimator
 on new data, we might get unlucky and the random initialization may lead to
 bad performance. Similarly, we want a random forest to be robust w.r.t the
diff --git a/doc/conf.py b/doc/conf.py
index 176a0d8b3a7d1..bfeb7a1b05940 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -10,14 +10,15 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys
 import os
-import warnings
 import re
+import sys
+import warnings
 from datetime import datetime
-from sklearn.externals._packaging.version import parse
-from pathlib import Path
 from io import StringIO
+from pathlib import Path
+
+from sklearn.externals._packaging.version import parse
 
 # If extensions (or modules to document with autodoc) are in another
 # directory, add these directories to sys.path here. If the directory
@@ -25,10 +26,10 @@
 # absolute, like shown here.
 sys.path.insert(0, os.path.abspath("sphinxext"))
 
-from github_link import make_linkcode_resolve
 import sphinx_gallery
+from github_link import make_linkcode_resolve
+from sphinx_gallery.notebook import add_code_cell, add_markdown_cell
 from sphinx_gallery.sorting import ExampleTitleSortKey
-from sphinx_gallery.notebook import add_markdown_cell, add_code_cell
 
 try:
     # Configure plotly to integrate its output into the HTML pages generated by
@@ -57,16 +58,23 @@
     "sphinx_issues",
     "add_toctree_functions",
     "sphinx-prompt",
+    "sphinx_copybutton",
     "sphinxext.opengraph",
     "doi_role",
     "allow_nan_estimators",
     "matplotlib.sphinxext.plot_directive",
 ]
 
+# Specify how to identify the prompt when copying code snippets
+copybutton_prompt_text = r">>> |\.\.\. "
+copybutton_prompt_is_regexp = True
+copybutton_exclude = "style"
+
 try:
     import jupyterlite_sphinx  # noqa: F401
 
     extensions.append("jupyterlite_sphinx")
+    with_jupyterlite = True
 except ImportError:
     # In some cases we don't want to require jupyterlite_sphinx to be installed,
     # e.g. the doc-min-dependencies build
@@ -74,6 +82,7 @@
         "jupyterlite_sphinx is not installed, you need to install it "
         "if you want JupyterLite links to appear in each example"
     )
+    with_jupyterlite = False
 
 # Produce `plot::` directives for examples that contain `import matplotlib` or
 # `from matplotlib import`.
@@ -299,6 +308,26 @@
 # Not showing the search summary makes the search page load faster.
 html_show_search_summary = False
 
+
+rst_prolog = """
+.. |details-start| raw:: html
+
+    <details>
+    <summary class="btn btn-light">
+
+.. |details-split| raw:: html
+
+    <span class="tooltiptext">Click for more details</span>
+    </summary>
+    <div class="card">
+
+.. |details-end| raw:: html
+
+    </div>
+    </details>
+
+"""
+
 # -- Options for LaTeX output ------------------------------------------------
 latex_elements = {
     # The paper size ('letterpaper' or 'a4paper').
@@ -500,13 +529,16 @@ def reset_sklearn_config(gallery_conf, fname):
         "dependencies": "./binder/requirements.txt",
         "use_jupyter_lab": True,
     },
-    "jupyterlite": {"notebook_modification_function": notebook_modification_function},
     # avoid generating too many cross links
     "inspect_global_variables": False,
     "remove_config_comments": True,
     "plot_gallery": "True",
     "reset_modules": ("matplotlib", "seaborn", reset_sklearn_config),
 }
+if with_jupyterlite:
+    sphinx_gallery_conf["jupyterlite"] = {
+        "notebook_modification_function": notebook_modification_function
+    }
 
 
 # The following dictionary contains the information used to create the
diff --git a/doc/conftest.py b/doc/conftest.py
index 73848ccf392fb..563105bc6757e 100644
--- a/doc/conftest.py
+++ b/doc/conftest.py
@@ -1,16 +1,17 @@
 import os
-from os.path import exists
-from os.path import join
-from os import environ
 import warnings
+from os import environ
+from os.path import exists, join
+
+import pytest
+from _pytest.doctest import DoctestItem
 
-from sklearn.utils import IS_PYPY
-from sklearn.utils._testing import SkipTest
-from sklearn.utils._testing import check_skip_network
-from sklearn.utils.fixes import parse_version
 from sklearn.datasets import get_data_home
 from sklearn.datasets._base import _pkl_filepath
 from sklearn.datasets._twenty_newsgroups import CACHE_NAME
+from sklearn.utils import IS_PYPY
+from sklearn.utils._testing import SkipTest, check_skip_network
+from sklearn.utils.fixes import np_base_version, parse_version
 
 
 def setup_labeled_faces():
@@ -174,3 +175,34 @@ def pytest_configure(config):
         matplotlib.use("agg")
     except ImportError:
         pass
+
+
+def pytest_collection_modifyitems(config, items):
+    """Called after collect is completed.
+
+    Parameters
+    ----------
+    config : pytest config
+    items : list of collected items
+    """
+    skip_doctests = False
+    if np_base_version >= parse_version("2"):
+        # Skip doctests when using numpy 2 for now. See the following discussion
+        # to decide what to do in the longer term:
+        # https://github.com/scikit-learn/scikit-learn/issues/27339
+        reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2"
+        skip_doctests = True
+
+    # Normally doctest has the entire module's scope. Here we set globs to an empty dict
+    # to remove the module's scope:
+    # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context
+    for item in items:
+        if isinstance(item, DoctestItem):
+            item.dtest.globs = {}
+
+    if skip_doctests:
+        skip_marker = pytest.mark.skip(reason=reason)
+
+        for item in items:
+            if isinstance(item, DoctestItem):
+                item.add_marker(skip_marker)
diff --git a/doc/contributor_experience_team.rst b/doc/contributor_experience_team.rst
index 00b658632302e..7d942a07e6a7d 100644
--- a/doc/contributor_experience_team.rst
+++ b/doc/contributor_experience_team.rst
@@ -6,10 +6,6 @@
       img.avatar {border-radius: 10px;}
     </style>
     <div>
-    <a href='https://github.com/ArturoAmorQ'><img src='https://avatars.githubusercontent.com/u/86408019?v=4' class='avatar' /></a> <br />
-    <p>Arturo Amor</p>
-    </div>
-    <div>
     <a href='https://github.com/alfaro96'><img src='https://avatars.githubusercontent.com/u/32649176?v=4' class='avatar' /></a> <br />
     <p>Juan Carlos Alfaro Jiménez</p>
     </div>
@@ -41,4 +37,8 @@
     <a href='https://github.com/albertcthomas'><img src='https://avatars.githubusercontent.com/u/15966638?v=4' class='avatar' /></a> <br />
     <p>Albert Thomas</p>
     </div>
+    <div>
+    <a href='https://github.com/marenwestermann'><img src='https://avatars.githubusercontent.com/u/17019042?v=4' class='avatar' /></a> <br />
+    <p>Maren Westermann</p>
+    </div>
     </div>
diff --git a/doc/datasets/loading_other_datasets.rst b/doc/datasets/loading_other_datasets.rst
index a376a69f26dc3..3a6669799f5a3 100644
--- a/doc/datasets/loading_other_datasets.rst
+++ b/doc/datasets/loading_other_datasets.rst
@@ -290,9 +290,9 @@ format usable by scikit-learn:
   context such as .mat and .arff
 * `numpy/routines.io <https://docs.scipy.org/doc/numpy/reference/routines.io.html>`_
   for standard loading of columnar data into numpy arrays
-* scikit-learn's :func:`datasets.load_svmlight_file` for the svmlight or libSVM
+* scikit-learn's :func:`load_svmlight_file` for the svmlight or libSVM
   sparse format
-* scikit-learn's :func:`datasets.load_files` for directories of text files where
+* scikit-learn's :func:`load_files` for directories of text files where
   the name of each directory is the name of each category and each file inside
   of each directory corresponds to one sample from that category
 
diff --git a/doc/developers/advanced_installation.rst b/doc/developers/advanced_installation.rst
index fe573deb28b83..2eab1bb06d979 100644
--- a/doc/developers/advanced_installation.rst
+++ b/doc/developers/advanced_installation.rst
@@ -26,12 +26,12 @@ Installing a nightly build is the quickest way to:
 
 - check whether a bug you encountered has been fixed since the last release.
 
-You can install the nightly build of scikit-learn using the `scipy-wheels-nightly`
+You can install the nightly build of scikit-learn using the `scientific-python-nightly-wheels`
 index from the PyPI registry of `anaconda.org`:
 
 .. prompt:: bash $
 
-  pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple scikit-learn
+  pip install --pre --extra-index https://pypi.anaconda.org/scientific-python-nightly-wheels/simple scikit-learn
 
 Note that first uninstalling scikit-learn might be required to be able to
 install nightly builds of scikit-learn.
@@ -69,6 +69,12 @@ feature, code or documentation improvement).
    .. prompt:: bash $
 
      conda create -n sklearn-env -c conda-forge python=3.9 numpy scipy cython
+
+   It is not always necessary but it is safer to open a new prompt before
+   activating the newly created conda environment.
+
+   .. prompt:: bash $
+
      conda activate sklearn-env
 
 #. **Alternative to conda:** If you run Linux or similar, you can instead use
@@ -90,7 +96,7 @@ feature, code or documentation improvement).
 
    .. prompt:: bash $
 
-     pip install --verbose --no-use-pep517 --no-build-isolation --editable .
+     pip install -v --no-use-pep517 --no-build-isolation -e .
 
 #. Check that the installed scikit-learn has a version number ending with
    `.dev0`:
@@ -109,7 +115,9 @@ feature, code or documentation improvement).
     (ending in `.pyx` or `.pxd`). This can happen when you edit them or when you
     use certain git commands such as `git pull`. Use the ``--no-build-isolation`` flag
     to avoid compiling the whole project each time, only the files you have
-    modified.
+    modified. Include the ``--no-use-pep517`` flag because the ``--no-build-isolation``
+    option might not work otherwise (this is due to a bug which will be fixed in the
+    future).
 
 Dependencies
 ------------
@@ -227,10 +235,13 @@ console:
 For 64-bit Python, configure the build environment by running the following
 commands in ``cmd`` or an Anaconda Prompt (if you use Anaconda):
 
-    ::
+.. sphinx-prompt 1.3.0 (used in doc-min-dependencies CI task) does not support `batch` prompt type,
+.. so we work around by using a known prompt type and an explicit prompt text.
+..
+.. prompt:: bash C:\>
 
-      $ SET DISTUTILS_USE_SDK=1
-      $ "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64
+    SET DISTUTILS_USE_SDK=1
+    "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64
 
 Replace ``x64`` by ``x86`` to build for 32-bit Python.
 
@@ -242,7 +253,7 @@ Finally, build scikit-learn from this command prompt:
 
 .. prompt:: bash $
 
-    pip install --verbose --no-use-pep517 --no-build-isolation --editable .
+    pip install -v --no-use-pep517 --no-build-isolation -e .
 
 .. _compiler_macos:
 
@@ -282,9 +293,15 @@ scikit-learn from source:
 
     conda create -n sklearn-dev -c conda-forge python numpy scipy cython \
         joblib threadpoolctl pytest compilers llvm-openmp
+
+It is not always necessary but it is safer to open a new prompt before
+activating the newly created conda environment.
+
+.. prompt:: bash $
+
     conda activate sklearn-dev
     make clean
-    pip install --verbose --no-use-pep517 --no-build-isolation --editable .
+    pip install -v --no-use-pep517 --no-build-isolation -e .
 
 .. note::
 
@@ -302,12 +319,6 @@ forge using the following command:
 
 which should include ``compilers`` and ``llvm-openmp``.
 
-.. note::
-
-   If you installed these packages after creating and activating a new conda
-   environment, you will need to first deactivate and then reactivate the
-   environment for these changes to take effect.
-
 The compilers meta-package will automatically set custom environment
 variables:
 
@@ -364,7 +375,7 @@ Finally, build scikit-learn in verbose mode (to check for the presence of the
 .. prompt:: bash $
 
     make clean
-    pip install --verbose --no-use-pep517 --no-build-isolation --editable .
+    pip install -v --no-use-pep517 --no-build-isolation -e .
 
 .. _compiler_linux:
 
@@ -423,8 +434,14 @@ in the user folder using conda:
 
     conda create -n sklearn-dev -c conda-forge python numpy scipy cython \
         joblib threadpoolctl pytest compilers
+
+It is not always necessary but it is safer to open a new prompt before
+activating the newly created conda environment.
+
+.. prompt:: bash $
+
     conda activate sklearn-dev
-    pip install --verbose --no-use-pep517 --no-build-isolation --editable .
+    pip install -v --no-use-pep517 --no-build-isolation -e .
 
 .. _compiler_freebsd:
 
@@ -453,7 +470,7 @@ Finally, build the package using the standard command:
 
 .. prompt:: bash $
 
-    pip install --verbose --no-use-pep517 --no-build-isolation --editable .
+    pip install -v --no-use-pep517 --no-build-isolation -e .
 
 For the upcoming FreeBSD 12.1 and 11.3 versions, OpenMP will be included in
 the base system and these steps will not be necessary.
@@ -514,7 +531,7 @@ and environment variable as follows before calling the ``pip install`` or
 ``python setup.py build_ext`` commands::
 
     export SKLEARN_BUILD_PARALLEL=3
-    pip install --verbose --no-use-pep517 --no-build-isolation --editable .
+    pip install -v --no-use-pep517 --no-build-isolation -e .
 
 On a machine with 2 CPU cores, it can be beneficial to use a parallelism level
 of 3 to overlap IO bound tasks (reading and writing files on disk) with CPU
diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 86575dd75d0f1..6aecc524a9a30 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -247,14 +247,14 @@ how to set up your git repository:
       git clone git@github.com:YourLogin/scikit-learn.git  # add --depth 1 if your connection is slow
       cd scikit-learn
 
-4. Follow steps 2-7 in :ref:`install_bleeding_edge` to build scikit-learn in
+4. Follow steps 2-6 in :ref:`install_bleeding_edge` to build scikit-learn in
    development mode and return to this document.
 
 5. Install the development dependencies:
 
    .. prompt:: bash $
 
-        pip install pytest pytest-cov flake8 mypy numpydoc black==23.3.0
+        pip install pytest pytest-cov ruff mypy numpydoc black==23.3.0
 
 .. _upstream:
 
@@ -274,9 +274,11 @@ how to set up your git repository:
         upstream	git@github.com:scikit-learn/scikit-learn.git (fetch)
         upstream	git@github.com:scikit-learn/scikit-learn.git (push)
 
-You should now have a working installation of scikit-learn, and your git
-repository properly configured. The next steps now describe the process of
-modifying code and submitting a PR:
+You should now have a working installation of scikit-learn, and your git repository
+properly configured. It could be useful to run some test to verify your installation.
+Please refer to :ref:`pytest_tips` for examples.
+
+The next steps now describe the process of modifying code and submitting a PR:
 
 8. Synchronize your ``main`` branch with the ``upstream/main`` branch,
    more details on `GitHub Docs <https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/syncing-a-fork>`_:
@@ -425,30 +427,15 @@ complies with the following rules before marking a PR as ``[MRG]``. The
    non-regression tests should fail for the code base in the ``main`` branch
    and pass for the PR code.
 
-5. Run `black` to auto-format your code.
-
-   .. prompt:: bash $
-
-        black .
-
-   See black's
-   `editor integration documentation <https://black.readthedocs.io/en/stable/integrations/editors.html>`_
-   to configure your editor to run `black`.
-
-6. Run `flake8` to make sure you followed the project coding conventions.
 
-   .. prompt:: bash $
-
-        flake8 .
-
-7. Follow the :ref:`coding-guidelines`.
+5. Follow the :ref:`coding-guidelines`.
 
 
-8. When applicable, use the validation tools and scripts in the
+6. When applicable, use the validation tools and scripts in the
    ``sklearn.utils`` submodule.  A list of utility routines available
    for developers can be found in the :ref:`developers-utils` page.
 
-9. Often pull requests resolve one or more other issues (or pull requests).
+7. Often pull requests resolve one or more other issues (or pull requests).
    If merging your pull request means that some other issues/PRs should
    be closed, you should `use keywords to create link to them
    <https://github.com/blog/1506-closing-issues-via-pull-requests/>`_
@@ -458,7 +445,7 @@ complies with the following rules before marking a PR as ``[MRG]``. The
    related to some other issues/PRs, create a link to them without using
    the keywords (e.g., ``See also #1234``).
 
-10. PRs should often substantiate the change, through benchmarks of
+8. PRs should often substantiate the change, through benchmarks of
     performance and efficiency (see :ref:`monitoring_performances`) or through
     examples of usage. Examples also illustrate the features and intricacies of
     the library to users. Have a look at other examples in the `examples/
@@ -467,14 +454,14 @@ complies with the following rules before marking a PR as ``[MRG]``. The
     functionality is useful in practice and, if possible, compare it to other
     methods available in scikit-learn.
 
-11. New features have some maintenance overhead. We expect PR authors
+9. New features have some maintenance overhead. We expect PR authors
     to take part in the maintenance for the code they submit, at least
     initially. New features need to be illustrated with narrative
     documentation in the user guide, with small code snippets.
     If relevant, please also add references in the literature, with PDF links
     when possible.
 
-12. The user guide should also include expected time and space complexity
+10. The user guide should also include expected time and space complexity
     of the algorithm and scalability, e.g. "this algorithm can scale to a
     large number of samples > 100000, but does not scale in dimensionality:
     n_features is expected to be lower than 100".
@@ -534,8 +521,10 @@ Continuous Integration (CI)
 
 * Azure pipelines are used for testing scikit-learn on Linux, Mac and Windows,
   with different dependencies and settings.
-* CircleCI is used to build the docs for viewing, for linting with flake8, and
-  for testing with ARM64 / aarch64 on Linux
+* CircleCI is used to build the docs for viewing.
+* Github Actions are used for various tasks, including building wheels and
+  source distributions.
+* Cirrus CI is used to build on ARM.
 
 Please note that if one of the following markers appear in the latest commit
 message, the following actions are taken.
@@ -553,6 +542,7 @@ message, the following actions are taken.
     [pypy]                 Build & test with PyPy
     [pyodide]              Build & test with Pyodide
     [azure parallel]       Run Azure CI jobs in parallel
+    [cirrus arm]           Run Cirrus CI ARM test
     [float32]              Run float32 tests by setting `SKLEARN_RUN_FLOAT32_TESTS=1`. See :ref:`environment_variable` for more details
     [doc skip]             Docs are not built
     [doc quick]            Docs built, but excludes example gallery plots
@@ -689,250 +679,301 @@ We are glad to accept any sort of documentation:
   of scikit-learn modules, compare different algorithms or discuss their
   interpretation etc. Examples live in
   `examples/ <https://github.com/scikit-learn/scikit-learn/tree/main/examples>`_
-* **other reStructuredText documents** (like this one) - provide various other
-  useful information (e.g., our guide to contributing) and live in
+* **other reStructuredText documents** - provide various other
+  useful information (e.g., the :ref:`contributing` guide) and live in
   `doc/ <https://github.com/scikit-learn/scikit-learn/tree/main/doc>`_.
 
-You can edit the documentation using any text editor, and then generate the
-HTML output by following :ref:`building_documentation`. The resulting HTML files
-will be placed in ``_build/html/stable`` and are viewable in a web browser, for
-instance by opening the local ``_build/html/stable/index.html`` file.
+|details-start|
+**Guidelines for writing docstrings**
+|details-split|
 
-.. _building_documentation:
+* When documenting the parameters and attributes, here is a list of some
+  well-formatted examples::
 
-Building the documentation
---------------------------
+    n_clusters : int, default=3
+        The number of clusters detected by the algorithm.
 
-First, make sure you have :ref:`properly installed <install_bleeding_edge>`
-the development version.
+    some_param : {'hello', 'goodbye'}, bool or int, default=True
+        The parameter description goes here, which can be either a string
+        literal (either `hello` or `goodbye`), a bool, or an int. The default
+        value is True.
 
-..
-    packaging is not needed once setuptools starts shipping packaging>=17.0
+    array_parameter : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples,)
+        This parameter accepts data in either of the mentioned forms, with one
+        of the mentioned shapes. The default value is
+        `np.ones(shape=(n_samples,))`.
 
-Building the documentation requires installing some additional packages:
+    list_param : list of int
 
-.. prompt:: bash $
+    typed_ndarray : ndarray of shape (n_samples,), dtype=np.int32
 
-    pip install sphinx sphinx-gallery numpydoc matplotlib Pillow pandas \
-                scikit-image packaging seaborn sphinx-prompt \
-                sphinxext-opengraph plotly pooch
+    sample_weight : array-like of shape (n_samples,), default=None
 
-To build the documentation, you need to be in the ``doc`` folder:
+    multioutput_array : ndarray of shape (n_samples, n_classes) or list of such arrays
 
-.. prompt:: bash $
+  In general have the following in mind:
 
-    cd doc
+    * Use Python basic types. (``bool`` instead of ``boolean``)
+    * Use parenthesis for defining shapes: ``array-like of shape (n_samples,)``
+      or ``array-like of shape (n_samples, n_features)``
+    * For strings with multiple options, use brackets: ``input: {'log',
+      'squared', 'multinomial'}``
+    * 1D or 2D data can be a subset of ``{array-like, ndarray, sparse matrix,
+      dataframe}``. Note that ``array-like`` can also be a ``list``, while
+      ``ndarray`` is explicitly only a ``numpy.ndarray``.
+    * Specify ``dataframe`` when "frame-like" features are being used, such as
+      the column names.
+    * When specifying the data type of a list, use ``of`` as a delimiter: ``list
+      of int``. When the parameter supports arrays giving details about the
+      shape and/or data type and a list of such arrays, you can use one of
+      ``array-like of shape (n_samples,) or list of such arrays``.
+    * When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32`` after
+      defining the shape: ``ndarray of shape (n_samples,), dtype=np.int32``. You
+      can specify multiple dtype as a set: ``array-like of shape (n_samples,),
+      dtype={np.float64, np.float32}``. If one wants to mention arbitrary
+      precision, use `integral` and `floating` rather than the Python dtype
+      `int` and `float`. When both `int` and `floating` are supported, there is
+      no need to specify the dtype.
+    * When the default is ``None``, ``None`` only needs to be specified at the
+      end with ``default=None``. Be sure to include in the docstring, what it
+      means for the parameter or attribute to be ``None``.
 
-In the vast majority of cases, you only need to generate the full web site,
-without the example gallery:
+* Add "See Also" in docstrings for related classes/functions.
 
-.. prompt:: bash $
+* "See Also" in docstrings should be one line per reference, with a colon and an
+  explanation, for example::
 
-    make
+    See Also
+    --------
+    SelectKBest : Select features based on the k highest scores.
+    SelectFpr : Select features based on a false positive rate test.
 
-The documentation will be generated in the ``_build/html/stable`` directory
-and are viewable in a web browser, for instance by opening the local
-``_build/html/stable/index.html`` file.
-To also generate the example gallery you can use:
+* Add one or two snippets of code in "Example" section to show how it can be used.
 
-.. prompt:: bash $
+|details-end|
 
-    make html
+|details-start|
+**Guidelines for writing the user guide and other reStructuredText documents**
+|details-split|
 
-This will run all the examples, which takes a while. If you only want to
-generate a few examples, you can use:
+It is important to keep a good compromise between mathematical and algorithmic
+details, and give intuition to the reader on what the algorithm does.
 
-.. prompt:: bash $
+* Begin with a concise, hand-waving explanation of what the algorithm/code does on
+  the data.
 
-    EXAMPLES_PATTERN=your_regex_goes_here make html
+* Highlight the usefulness of the feature and its recommended application.
+  Consider including the algorithm's complexity
+  (:math:`O\left(g\left(n\right)\right)`) if available, as "rules of thumb" can
+  be very machine-dependent. Only if those complexities are not available, then
+  rules of thumb may be provided instead.
 
-This is particularly useful if you are modifying a few examples.
+* Incorporate a relevant figure (generated from an example) to provide intuitions.
 
-Set the environment variable `NO_MATHJAX=1` if you intend to view
-the documentation in an offline setting.
+* Include one or two short code examples to demonstrate the feature's usage.
 
-To build the PDF manual, run:
+* Introduce any necessary mathematical equations, followed by references. By
+  deferring the mathematical aspects, the documentation becomes more accessible
+  to users primarily interested in understanding the feature's practical
+  implications rather than its underlying mechanics.
 
-.. prompt:: bash $
+* When editing reStructuredText (``.rst``) files, try to keep line length under
+  88 characters when possible (exceptions include links and tables).
 
-    make latexpdf
+* In scikit-learn reStructuredText files both single and double backticks
+  surrounding text will render as inline literal (often used for code, e.g.,
+  `list`). This is due to specific configurations we have set. Single
+  backticks should be used nowadays.
 
-.. warning:: **Sphinx version**
+* Too much information makes it difficult for users to access the content they
+  are interested in. Use dropdowns to factorize it by using the following
+  syntax::
 
-   While we do our best to have the documentation build under as many
-   versions of Sphinx as possible, the different versions tend to
-   behave slightly differently. To get the best results, you should
-   use the same version as the one we used on CircleCI. Look at this
-   `github search <https://github.com/search?utf8=%E2%9C%93&q=sphinx+repo%3Ascikit-learn%2Fscikit-learn+extension%3Ash+path%3Abuild_tools%2Fcircle&type=Code>`_
-   to know the exact version.
+    |details-start|
+    **Dropdown title**
+    |details-split|
 
-Guidelines for writing documentation
-------------------------------------
+    Dropdown content.
 
-It is important to keep a good compromise between mathematical and algorithmic
-details, and give intuition to the reader on what the algorithm does.
+    |details-end|
 
-Basically, to elaborate on the above, it is best to always
-start with a small paragraph with a hand-waving explanation of what the
-method does to the data. Then, it is very helpful to point out why the feature is
-useful and when it should be used - the latter also including "big O"
-(:math:`O\left(g\left(n\right)\right)`) complexities of the algorithm, as opposed
-to just *rules of thumb*, as the latter can be very machine-dependent. If those
-complexities are not available, then rules of thumb may be provided instead.
+  The snippet above will result in the following dropdown:
 
-Secondly, a generated figure from an example (as mentioned in the previous
-paragraph) should then be included to further provide some intuition.
+  |details-start|
+  **Dropdown title**
+  |details-split|
 
-Next, one or two small code examples to show its use can be added.
+  Dropdown content.
 
-Next, any math and equations, followed by references,
-can be added to further the documentation. Not starting the
-documentation with the maths makes it more friendly towards
-users that are just interested in what the feature will do, as
-opposed to how it works "under the hood".
+  |details-end|
 
-Finally, follow the formatting rules below to make it consistently good:
+* Information that can be hidden by default using dropdowns is:
 
-* Add "See Also" in docstrings for related classes/functions.
+    * low hierarchy sections such as `References`, `Properties`, etc. (see for
+      instance the subsections in :ref:`det_curve`);
 
-* "See Also" in docstrings should be one line per reference,
-  with a colon and an explanation, for example::
+    * in-depth mathematical details;
 
-    See Also
-    --------
-    SelectKBest : Select features based on the k highest scores.
-    SelectFpr : Select features based on a false positive rate test.
+    * narrative that is use-case specific;
 
-* When documenting the parameters and attributes, here is a list of some
-  well-formatted examples::
+    * in general, narrative that may only interest users that want to go beyond
+      the pragmatics of a given tool.
 
-    n_clusters : int, default=3
-        The number of clusters detected by the algorithm.
+* Do not use dropdowns for the low level section `Examples`, as it should stay
+  visible to all users. Make sure that the `Examples` section comes right after
+  the main discussion with the least possible folded section in-between.
 
-    some_param : {'hello', 'goodbye'}, bool or int, default=True
-        The parameter description goes here, which can be either a string
-        literal (either `hello` or `goodbye`), a bool, or an int. The default
-        value is True.
+* Be aware that dropdowns break cross-references. If that makes sense, hide the
+  reference along with the text mentioning it. Else, do not use dropdown.
 
-    array_parameter : {array-like, sparse matrix} of shape (n_samples, n_features) or (n_samples,)
-        This parameter accepts data in either of the mentioned forms, with one
-        of the mentioned shapes. The default value is
-        `np.ones(shape=(n_samples,))`.
+|details-end|
 
-    list_param : list of int
 
-    typed_ndarray : ndarray of shape (n_samples,), dtype=np.int32
+|details-start|
+**Guidelines for writing references**
+|details-split|
 
-    sample_weight : array-like of shape (n_samples,), default=None
+* When bibliographic references are available with `arxiv <https://arxiv.org/>`_
+  or `Digital Object Identifier <https://www.doi.org/>`_ identification numbers,
+  use the sphinx directives `:arxiv:` or `:doi:`. For example, see references in
+  :ref:`Spectral Clustering Graphs <spectral_clustering_graph>`.
 
-    multioutput_array : ndarray of shape (n_samples, n_classes) or list of such arrays
+* For "References" in docstrings, see the Silhouette Coefficient
+  (:func:`sklearn.metrics.silhouette_score`).
 
-  In general have the following in mind:
+* To cross-reference to other pages in the scikit-learn documentation use the
+  reStructuredText cross-referencing syntax:
 
-      1. Use Python basic types. (``bool`` instead of ``boolean``)
-      2. Use parenthesis for defining shapes: ``array-like of shape (n_samples,)``
-         or ``array-like of shape (n_samples, n_features)``
-      3. For strings with multiple options, use brackets:
-         ``input: {'log', 'squared', 'multinomial'}``
-      4. 1D or 2D data can be a subset of
-         ``{array-like, ndarray, sparse matrix, dataframe}``. Note that ``array-like``
-         can also be a ``list``, while ``ndarray`` is explicitly only a ``numpy.ndarray``.
-      5. Specify ``dataframe`` when "frame-like" features are being used, such
-         as the column names.
-      6. When specifying the data type of a list, use ``of`` as a delimiter:
-         ``list of int``. When the parameter supports arrays giving details
-         about the shape and/or data type and a list of such arrays, you can
-         use one of ``array-like of shape (n_samples,) or list of such arrays``.
-      7. When specifying the dtype of an ndarray, use e.g. ``dtype=np.int32``
-         after defining the shape:
-         ``ndarray of shape (n_samples,), dtype=np.int32``. You can specify
-         multiple dtype as a set:
-         ``array-like of shape (n_samples,), dtype={np.float64, np.float32}``.
-         If one wants to mention arbitrary precision, use `integral` and
-         `floating` rather than the Python dtype `int` and `float`. When both
-         `int` and `floating` are supported, there is no need to specify the
-         dtype.
-      8. When the default is ``None``, ``None`` only needs to be specified at the
-         end with ``default=None``. Be sure to include in the docstring, what it
-         means for the parameter or attribute to be ``None``.
-
-* For unwritten formatting rules, try to follow existing good works:
-
-    * When bibliographic references are available with `arxiv <https://arxiv.org/>`_
-      or `Digital Object Identifier <https://www.doi.org/>`_ identification numbers,
-      use the sphinx directives `:arxiv:` or `:doi:`. For example, see references in
-      :ref:`Spectral Clustering Graphs <spectral_clustering_graph>`.
-    * For "References" in docstrings, see the Silhouette Coefficient
-      (:func:`sklearn.metrics.silhouette_score`).
+  * Section - to link to an arbitrary section in the documentation, use
+    reference labels (see `Sphinx docs
+    <https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#ref-role>`_).
+    For example:
 
-* When editing reStructuredText (``.rst``) files, try to keep line length under
-  80 characters when possible (exceptions include links and tables).
+    .. code-block:: rst
 
-* In scikit-learn reStructuredText files both single and double backticks
-  surrounding text will render as inline literal (often used for code, e.g.,
-  `list`). This is due to specific configurations we have set. Single
-  backticks should be used nowadays.
+        .. _my-section:
 
-* Before submitting your pull request check if your modifications have
-  introduced new sphinx warnings and try to fix them.
+        My section
+        ----------
 
-Cross-referencing
------------------
+        This is the text of the section.
+
+        To refer to itself use :ref:`my-section`.
+
+    You should not modify existing sphinx reference labels as this would break
+    existing cross references and external links pointing to specific sections
+    in the scikit-learn documentation.
+
+  * Glossary - linking to a term in the :ref:`glossary`:
+
+    .. code-block:: rst
+
+        :term:`cross_validation`
+
+  * Function - to link to the documentation of a function, use the full import
+    path to the function:
+
+    .. code-block:: rst
+
+        :func:`~sklearn.model_selection.cross_val_score`
+
+    However, if there is a `.. currentmodule::` directive above you in the document,
+    you will only need to use the path to the function succeeding the current
+    module specified. For example:
+
+    .. code-block:: rst
+
+        .. currentmodule:: sklearn.model_selection
+
+        :func:`cross_val_score`
+
+  * Class - to link to documentation of a class, use the full import path to the
+    class, unless there is a 'currentmodule' directive in the document above
+    (see above):
+
+    .. code-block:: rst
+
+        :class:`~sklearn.preprocessing.StandardScaler`
+
+|details-end|
+
+You can edit the documentation using any text editor, and then generate the
+HTML output by following :ref:`building_documentation`. The resulting HTML files
+will be placed in ``_build/html/stable`` and are viewable in a web browser, for
+instance by opening the local ``_build/html/stable/index.html`` file.
+
+
+.. _building_documentation:
 
-It is often useful to cross-reference to other pages in the scikit-learn
-documentation. This should be done with reStructuredText cross-referencing
-syntax:
+Building the documentation
+--------------------------
 
-* Section - to link to an arbitrary section in the documentation, use reference
-  labels (see
-  `Sphinx docs <https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html#ref-role>`_).
-  For example:
+**Before submitting a pull request check if your modifications have introduced
+new sphinx warnings by building the documentation locally and try to fix them.**
 
-  .. code-block:: rst
+First, make sure you have :ref:`properly installed <install_bleeding_edge>`
+the development version.
 
-      .. _my-section:
+..
+    packaging is not needed once setuptools starts shipping packaging>=17.0
 
-      My section
-      ----------
+Building the documentation requires installing some additional packages:
 
-      This is the text of the section.
+.. prompt:: bash $
 
-      To refer to itself use :ref:`my-section`.
+    pip install sphinx sphinx-gallery numpydoc matplotlib Pillow pandas \
+                scikit-image packaging seaborn sphinx-prompt \
+                sphinxext-opengraph sphinx-copybutton plotly pooch
 
-  You should not modify existing sphinx reference labels as this would break
-  existing cross references and external links pointing to specific sections in
-  the scikit-learn documentation.
+To build the documentation, you need to be in the ``doc`` folder:
 
-* Glossary - linking to a term in the :ref:`glossary`:
+.. prompt:: bash $
 
-  .. code-block:: rst
+    cd doc
 
-      :term:`cross_validation`
+In the vast majority of cases, you only need to generate the full web site,
+without the example gallery:
 
-* Function - to link to the documentation of a function, use the full
-  import path to the function:
+.. prompt:: bash $
 
-  .. code-block:: rst
+    make
 
-      :func:`~sklearn.model_selection.cross_val_score`
+The documentation will be generated in the ``_build/html/stable`` directory
+and are viewable in a web browser, for instance by opening the local
+``_build/html/stable/index.html`` file.
+To also generate the example gallery you can use:
 
-  However, if there is a 'currentmodule' directive above you in the document,
-  you will only need to use the path to the function succeeding the current
-  module specified. For example:
+.. prompt:: bash $
 
-  .. code-block:: rst
+    make html
 
-      .. currentmodule:: sklearn.model_selection
+This will run all the examples, which takes a while. If you only want to
+generate a few examples, you can use:
+
+.. prompt:: bash $
+
+    EXAMPLES_PATTERN=your_regex_goes_here make html
+
+This is particularly useful if you are modifying a few examples.
+
+Set the environment variable `NO_MATHJAX=1` if you intend to view
+the documentation in an offline setting.
 
-      :func:`cross_val_score`
+To build the PDF manual, run:
+
+.. prompt:: bash $
 
-* Class - to link to documentation of a class, use the full import path to the
-  class, unless there is a 'currentmodule' directive in the document above
-  (see above):
+    make latexpdf
+
+.. warning:: **Sphinx version**
 
-  .. code-block:: rst
+   While we do our best to have the documentation build under as many
+   versions of Sphinx as possible, the different versions tend to
+   behave slightly differently. To get the best results, you should
+   use the same version as the one we used on CircleCI. Look at this
+   `GitHub search <https://github.com/search?q=repo%3Ascikit-learn%2Fscikit-learn+sphinx+path%3Abuild_tools%2Fcircle%2Fdoc_environment.yml&type=code>`_
+   to know the exact version.
 
-      :class:`~sklearn.preprocessing.StandardScaler`
 
 .. _generated_doc_CI:
 
diff --git a/doc/developers/develop.rst b/doc/developers/develop.rst
index f4fd4898865ea..8db8799daf6fe 100644
--- a/doc/developers/develop.rst
+++ b/doc/developers/develop.rst
@@ -414,7 +414,7 @@ trailing ``_`` is used to check if the estimator has been fitted.
 
 Cloning
 -------
-For use with the :mod:`model_selection` module,
+For use with the :mod:`~sklearn.model_selection` module,
 an estimator must support the ``base.clone`` function to replicate an estimator.
 This can be done by providing a ``get_params`` method.
 If ``get_params`` is present, then ``clone(estimator)`` will be an instance of
@@ -508,7 +508,7 @@ independent term is stored in ``intercept_``.  ``sklearn.linear_model._base``
 contains a few base classes and mixins that implement common linear model
 patterns.
 
-The :mod:`sklearn.utils.multiclass` module contains useful functions
+The :mod:`~sklearn.utils.multiclass` module contains useful functions
 for working with multiclass and multilabel problems.
 
 .. _estimator_tags:
@@ -568,7 +568,7 @@ pairwise (default=False)
     or a cross validation procedure that extracts a sub-sample of data intended
     for a pairwise estimator, where the data needs to be indexed on both axes.
     Specifically, this tag is used by
-    :func:`~sklearn.utils.metaestimators._safe_split` to slice rows and
+    `sklearn.utils.metaestimators._safe_split` to slice rows and
     columns.
 
 preserves_dtype (default=``[np.float64]``)
@@ -709,6 +709,20 @@ only wrap the first array and not alter the other arrays.
 See :ref:`sphx_glr_auto_examples_miscellaneous_plot_set_output.py`
 for an example on how to use the API.
 
+.. _developer_api_check_is_fitted:
+
+Developer API for `check_is_fitted`
+===================================
+
+By default :func:`~sklearn.utils.validation.check_is_fitted` checks if there
+are any attributes in the instance with a trailing underscore, e.g. `coef_`.
+An estimator can change the behavior by implementing a `__sklearn_is_fitted__`
+method taking no input and returning a boolean. If this method exists,
+:func:`~sklearn.utils.validation.check_is_fitted` simply returns its output.
+
+See :ref:`sphx_glr_auto_examples_developing_estimators_sklearn_is_fitted.py`
+for an example on how to use the API.
+
 .. _coding-guidelines:
 
 Coding guidelines
@@ -855,7 +869,7 @@ Numerical assertions in tests
 -----------------------------
 
 When asserting the quasi-equality of arrays of continuous values,
-do use :func:`sklearn.utils._testing.assert_allclose`.
+do use `sklearn.utils._testing.assert_allclose`.
 
 The relative tolerance is automatically inferred from the provided arrays
 dtypes (for float32 and float64 dtypes in particular) but you can override
@@ -865,4 +879,4 @@ When comparing arrays of zero-elements, please do provide a non-zero value for
 the absolute tolerance via ``atol``.
 
 For more information, please refer to the docstring of
-:func:`sklearn.utils._testing.assert_allclose`.
+`sklearn.utils._testing.assert_allclose`.
diff --git a/doc/developers/maintainer.rst b/doc/developers/maintainer.rst
index 6b49103774d9c..d2a1d21523f78 100644
--- a/doc/developers/maintainer.rst
+++ b/doc/developers/maintainer.rst
@@ -208,7 +208,9 @@ Making a release
    - Update the release date in ``whats_new.rst``
 
    - Edit the ``doc/templates/index.html`` to change the 'News' entry of the
-     front page (with the release month as well).
+     front page (with the release month as well). Do not forget to remove
+     the old entries (two years or three releases are typically good
+     enough)
 
 2. On the branch for releasing, update the version number in
    ``sklearn/__init__.py``, the ``__version__``.
diff --git a/doc/developers/plotting.rst b/doc/developers/plotting.rst
index b0e8b3b43ee45..90b45ff4ef6b3 100644
--- a/doc/developers/plotting.rst
+++ b/doc/developers/plotting.rst
@@ -87,7 +87,7 @@ be placed. In this case, we suggest using matplotlib's
 By default, the `ax` keyword in `plot` is `None`. In this case, the single
 axes is created and the gridspec api is used to create the regions to plot in.
 
-See for example, :func:`~sklearn.inspection.PartialDependenceDisplay.from_estimator
+See for example, :meth:`~sklearn.inspection.PartialDependenceDisplay.from_estimator`
 which plots multiple lines and contours using this API. The axes defining the
 bounding box is saved in a `bounding_ax_` attribute. The individual axes
 created are stored in an `axes_` ndarray, corresponding to the axes position on
diff --git a/doc/developers/utilities.rst b/doc/developers/utilities.rst
index 8b3612afda82a..2525b2b1365ed 100644
--- a/doc/developers/utilities.rst
+++ b/doc/developers/utilities.rst
@@ -97,7 +97,7 @@ Efficient Linear Algebra & Array Operations
   fast on large matrices on which you wish to extract only a small
   number of components.
 
-- :func:`arrayfuncs.cholesky_delete`:
+- `arrayfuncs.cholesky_delete`:
   (used in :func:`~sklearn.linear_model.lars_path`)  Remove an
   item from a cholesky factorization.
 
diff --git a/doc/documentation_team.rst b/doc/documentation_team.rst
new file mode 100644
index 0000000000000..935a995a7c00e
--- /dev/null
+++ b/doc/documentation_team.rst
@@ -0,0 +1,16 @@
+.. raw :: html
+
+    <!-- Generated by generate_authors_table.py -->
+    <div class="sk-authors-container">
+    <style>
+      img.avatar {border-radius: 10px;}
+    </style>
+    <div>
+    <a href='https://github.com/ArturoAmorQ'><img src='https://avatars.githubusercontent.com/u/86408019?v=4' class='avatar' /></a> <br />
+    <p>Arturo Amor</p>
+    </div>
+    <div>
+    <a href='https://github.com/lucyleeow'><img src='https://avatars.githubusercontent.com/u/23182829?v=4' class='avatar' /></a> <br />
+    <p>Lucy Liu</p>
+    </div>
+    </div>
diff --git a/doc/glossary.rst b/doc/glossary.rst
index 0a249cf94ad22..57e7242825cac 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -205,6 +205,29 @@ General Concepts
         exceptional behaviours on the estimator using semantic :term:`estimator
         tags`.
 
+    cross-fitting
+    cross fitting
+        A resampling method that iteratively partitions data into mutually
+        exclusive subsets to fit two stages. During the first stage, the
+        mutually exclusive subsets enable predictions or transformations to be
+        computed on data not seen during training. The computed data is then
+        used in the second stage. The objective is to avoid having any
+        overfitting in the first stage introduce bias into the input data
+        distribution of the second stage.
+        For examples of its use, see: :class:`~preprocessing.TargetEncoder`,
+        :class:`~ensemble.StackingClassifier`,
+        :class:`~ensemble.StackingRegressor` and
+        :class:`~calibration.CalibratedClassifierCV`.
+
+    cross-validation
+    cross validation
+        A resampling method that iteratively partitions data into mutually
+        exclusive 'train' and 'test' subsets so model performance can be
+        evaluated on unseen data. This conserves data as avoids the need to hold
+        out a 'validation' dataset and accounts for variability as multiple
+        rounds of cross validation are generally performed.
+        See :ref:`User Guide <cross_validation>` for more details.
+
     deprecation
         We use deprecation to slowly violate our :term:`backwards
         compatibility` assurances, usually to:
@@ -344,8 +367,8 @@ General Concepts
     evaluation metric
     evaluation metrics
         Evaluation metrics give a measure of how well a model performs.  We may
-        use this term specifically to refer to the functions in :mod:`metrics`
-        (disregarding :mod:`metrics.pairwise`), as distinct from the
+        use this term specifically to refer to the functions in :mod:`~sklearn.metrics`
+        (disregarding :mod:`~sklearn.metrics.pairwise`), as distinct from the
         :term:`score` method and the :term:`scoring` API used in cross
         validation. See :ref:`model_evaluation`.
 
@@ -360,7 +383,7 @@ General Concepts
         the scoring API.
 
         Note that some estimators can calculate metrics that are not included
-        in :mod:`metrics` and are estimator-specific, notably model
+        in :mod:`~sklearn.metrics` and are estimator-specific, notably model
         likelihoods.
 
     estimator tags
@@ -494,8 +517,8 @@ General Concepts
         applying a :term:`transformer` to the entirety of a dataset rather
         than each training portion in a cross validation split.
 
-        We aim to provide interfaces (such as :mod:`pipeline` and
-        :mod:`model_selection`) that shield the user from data leakage.
+        We aim to provide interfaces (such as :mod:`~sklearn.pipeline` and
+        :mod:`~sklearn.model_selection`) that shield the user from data leakage.
 
     memmapping
     memory map
@@ -575,7 +598,7 @@ General Concepts
     params
         We mostly use *parameter* to refer to the aspects of an estimator that
         can be specified in its construction. For example, ``max_depth`` and
-        ``random_state`` are parameters of :class:`RandomForestClassifier`.
+        ``random_state`` are parameters of :class:`~ensemble.RandomForestClassifier`.
         Parameters to an estimator's constructor are stored unmodified as
         attributes on the estimator instance, and conventionally start with an
         alphabetic character and end with an alphanumeric character.  Each
@@ -620,7 +643,7 @@ General Concepts
         implementations of distance metrics (as well as improper metrics like
         Cosine Distance) through :func:`metrics.pairwise_distances`, and of
         kernel functions (a constrained class of similarity functions) in
-        :func:`metrics.pairwise_kernels`.  These can compute pairwise distance
+        :func:`metrics.pairwise.pairwise_kernels`.  These can compute pairwise distance
         matrices that are symmetric and hence store data redundantly.
 
         See also :term:`precomputed` and :term:`metric`.
@@ -1026,6 +1049,38 @@ Further examples:
 * :class:`gaussian_process.kernels.Kernel`
 * ``tree.Criterion``
 
+.. _glossary_metadata_routing:
+
+Metadata Routing
+================
+
+.. glossary::
+
+    consumer
+        An object which consumes :term:`metadata`. This object is usually an
+        :term:`estimator`, a :term:`scorer`, or a :term:`CV splitter`. Consuming
+        metadata means using it in calculations, e.g. using
+        :term:`sample_weight` to calculate a certain type of score. Being a
+        consumer doesn't mean that the object always receives a certain
+        metadata, rather it means it can use it if it is provided.
+
+    metadata
+        Data which is related to the given :term:`X` and :term:`y` data, but
+        is not directly a part of the data, e.g. :term:`sample_weight` or
+        :term:`groups`, and is passed along to different objects and methods,
+        e.g. to a :term:`scorer` or a :term:`CV splitter`.
+
+    router
+        An object which routes metadata to :term:`consumers <consumer>`. This
+        object is usually a :term:`meta-estimator`, e.g.
+        :class:`~pipeline.Pipeline` or :class:`~model_selection.GridSearchCV`.
+        Some routers can also be a consumer. This happens for example when a
+        meta-estimator uses the given :term:`groups`, and it also passes it
+        along to some of its sub-objects, such as a :term:`CV splitter`.
+
+Please refer to :ref:`Metadta Routing User Guide <metadata_routing>` for more
+information.
+
 .. _glossary_target_types:
 
 Target Types
@@ -1122,7 +1177,7 @@ Target Types
         XXX: For simplicity, we may not always support string class labels
         for multiclass multioutput, and integer class labels should be used.
 
-        :mod:`multioutput` provides estimators which estimate multi-output
+        :mod:`~sklearn.multioutput` provides estimators which estimate multi-output
         problems using multiple single-output estimators.  This may not fully
         account for dependencies among the different outputs, which methods
         natively handling the multioutput case (e.g. decision trees, nearest
@@ -1474,7 +1529,7 @@ functions or non-estimator constructors.
         1: 1}, {0: 1, 1: 1}]`` instead of ``[{1:1}, {2:5}, {3:1}, {4:1}]``.
 
         The ``class_weight`` parameter is validated and interpreted with
-        :func:`utils.compute_class_weight`.
+        :func:`utils.class_weight.compute_class_weight`.
 
     ``cv``
         Determines a cross validation splitting strategy, as used in
@@ -1500,16 +1555,17 @@ functions or non-estimator constructors.
         With some exceptions (especially where not using cross validation at
         all is an option), the default is 5-fold.
 
-        ``cv`` values are validated and interpreted with :func:`utils.check_cv`.
+        ``cv`` values are validated and interpreted with
+        :func:`model_selection.check_cv`.
 
     ``kernel``
         Specifies the kernel function to be used by Kernel Method algorithms.
-        For example, the estimators :class:`SVC` and
-        :class:`GaussianProcessClassifier` both have a ``kernel`` parameter
-        that takes the name of the kernel to use as string or a callable
-        kernel function used to compute the kernel matrix. For more reference,
-        see the :ref:`kernel_approximation` and the :ref:`gaussian_process`
-        user guides.
+        For example, the estimators :class:`svm.SVC` and
+        :class:`gaussian_process.GaussianProcessClassifier` both have a
+        ``kernel`` parameter that takes the name of the kernel to use as string
+        or a callable kernel function used to compute the kernel matrix. For
+        more reference, see the :ref:`kernel_approximation` and the
+        :ref:`gaussian_process` user guides.
 
     ``max_iter``
         For estimators involving iterative optimization, this determines the
@@ -1670,12 +1726,12 @@ functions or non-estimator constructors.
         is an interaction between ``warm_start`` and the parameter controlling
         the number of iterations of the estimator.
 
-        For estimators imported from :mod:`ensemble`,
+        For estimators imported from :mod:`~sklearn.ensemble`,
         ``warm_start`` will interact with ``n_estimators`` or ``max_iter``.
         For these models, the number of iterations, reported via
         ``len(estimators_)`` or ``n_iter_``, corresponds the total number of
         estimators/iterations learnt since the initialization of the model.
-        Thus, if a model was already initialized with `N`` estimators, and `fit`
+        Thus, if a model was already initialized with `N` estimators, and `fit`
         is called with ``n_estimators`` or ``max_iter`` set to `M`, the model
         will train `M - N` new estimators.
 
diff --git a/doc/governance.rst b/doc/governance.rst
index 5b153aed7a0ce..33afd7dde8ddb 100644
--- a/doc/governance.rst
+++ b/doc/governance.rst
@@ -86,6 +86,17 @@ For this, they can operate the scikit-learn accounts on various social networks
 and produce materials. They also have the required rights to our blog
 repository and other relevant accounts and platforms.
 
+Documentation team
+~~~~~~~~~~~~~~~~~~
+
+Members of the documentation team engage with the documentation of the project
+among other things. They might also be involved in other aspects of the
+project, but their reviews on documentation contributions are considered
+authoritative, and can merge such contributions.
+
+To this end, they have permissions to merge pull requests in scikit-learn's
+repository.
+
 Maintainers
 ~~~~~~~~~~~
 
@@ -158,8 +169,8 @@ are made according to the following rules:
   versions** happen via a :ref:`slep` and follows the decision-making process
   outlined above.
 
-* **Changes to the governance model** follow the process outlined in [
-  SLEP020](https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep020/proposal.html).
+* **Changes to the governance model** follow the process outlined in `SLEP020
+  <https://scikit-learn-enhancement-proposals.readthedocs.io/en/latest/slep020/proposal.html>`__.
 
 If a veto -1 vote is cast on a lazy consensus, the proposer can appeal to the
 community and maintainers and the change can be approved or rejected using
diff --git a/doc/images/bcg-small.png b/doc/images/bcg-small.png
deleted file mode 100644
index 8ff377969003a..0000000000000
Binary files a/doc/images/bcg-small.png and /dev/null differ
diff --git a/doc/images/chanel-small.png b/doc/images/chanel-small.png
new file mode 100644
index 0000000000000..b1965b714a42f
Binary files /dev/null and b/doc/images/chanel-small.png differ
diff --git a/doc/images/chanel.png b/doc/images/chanel.png
new file mode 100644
index 0000000000000..1b2d39fd4facf
Binary files /dev/null and b/doc/images/chanel.png differ
diff --git a/doc/images/fujitsu-small.png b/doc/images/fujitsu-small.png
deleted file mode 100644
index b77447117497d..0000000000000
Binary files a/doc/images/fujitsu-small.png and /dev/null differ
diff --git a/doc/install.rst b/doc/install.rst
index bf2832bf72f24..8b36da24adf08 100644
--- a/doc/install.rst
+++ b/doc/install.rst
@@ -61,7 +61,7 @@ Installing the latest release
          ><span class="sk-expandable" data-packager="pip" data-os="linux">Install python3 and python3-pip using the package manager of the Linux Distribution.</span
          ><span class="sk-expandable" data-packager="conda"
             >Install conda using the <a href="https://docs.conda.io/projects/conda/en/latest/user-guide/install/">Anaconda or miniconda</a>
-             installers or the <a href="https://https://github.com/conda-forge/miniforge#miniforge">miniforge</a> installers
+             installers or the <a href="https://github.com/conda-forge/miniforge#miniforge">miniforge</a> installers
              (no administrator permission required for any of those).</span>
        </div>
 
@@ -69,42 +69,65 @@ Then run:
 
 .. raw:: html
 
-       <div class="highlight"><pre><code
-        ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="">python3 -m venv sklearn-venv</span
-        ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="">python -m venv sklearn-venv</span
-        ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="">python -m venv sklearn-venv</span
-        ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="">source sklearn-venv/bin/activate</span
-        ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="">source sklearn-venv/bin/activate</span
-        ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="">sklearn-venv\Scripts\activate</span
-        ><span class="sk-expandable" data-packager="pip" data-venv="">pip install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">pip install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">pip install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">pip3 install -U scikit-learn</span
-        ><span class="sk-expandable" data-packager="conda">conda create -n sklearn-env -c conda-forge scikit-learn</span
-        ><span class="sk-expandable" data-packager="conda">conda activate sklearn-env</span
-       ></code></pre></div>
+  <div class="highlight">
+    <pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no"
+    ><span>pip3 install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no"
+    ><span>pip install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no"
+    ><span>pip install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv=""
+    ><span>python3 -m venv sklearn-venv</span>
+  <span>source sklearn-venv/bin/activate</span>
+  <span>pip3 install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv=""
+    ><span>python -m venv sklearn-venv</span>
+  <span>sklearn-venv\Scripts\activate</span>
+  <span>pip install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv=""
+    ><span>python -m venv sklearn-venv</span>
+  <span>source sklearn-venv/bin/activate</span>
+  <span>pip install -U scikit-learn</span></pre>
+
+    <pre class="sk-expandable" data-packager="conda"
+    ><span>conda create -n sklearn-env -c conda-forge scikit-learn</span>
+  <span>conda activate sklearn-env</span></pre>
+  </div>
 
 In order to check your installation you can use
 
 .. raw:: html
 
-   <div class="highlight"><pre><code
-      ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span
-      ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -m pip freeze  # to see all packages installed in the active virtualenv</span
-      ><span class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no">python3 -c "import sklearn; sklearn.show_versions()"</span
-      ><span class="sk-expandable" data-packager="pip" data-venv="">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span
-      ><span class="sk-expandable" data-packager="pip" data-venv="">python -m pip freeze  # to see all packages installed in the active virtualenv</span
-      ><span class="sk-expandable" data-packager="pip" data-venv="">python -c "import sklearn; sklearn.show_versions()"</span
-      ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span
-      ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -m pip freeze  # to see all packages installed in the active virtualenv</span
-      ><span class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no">python -c "import sklearn; sklearn.show_versions()"</span
-      ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span
-      ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -m pip freeze  # to see all packages installed in the active virtualenv</span
-      ><span class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no">python -c "import sklearn; sklearn.show_versions()"</span
-      ><span class="sk-expandable" data-packager="conda">conda list scikit-learn  # to see which scikit-learn version is installed</span
-      ><span class="sk-expandable" data-packager="conda">conda list  # to see all packages installed in the active conda environment</span
-      ><span class="sk-expandable" data-packager="conda">python -c "import sklearn; sklearn.show_versions()"</span
-      ></code></pre></div>
+  <div class="highlight">
+    <pre class="sk-expandable" data-packager="pip" data-os="linux" data-venv="no"
+    ><span>python3 -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span>
+  <span>python3 -m pip freeze  # to see all packages installed in the active virtualenv</span>
+  <span>python3 -c "import sklearn; sklearn.show_versions()"</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="windows" data-venv="no"
+    ><span>python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span>
+  <span>python -m pip freeze  # to see all packages installed in the active virtualenv</span>
+  <span>python -c "import sklearn; sklearn.show_versions()"</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-os="mac" data-venv="no"
+    ><span>python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span>
+  <span>python -m pip freeze  # to see all packages installed in the active virtualenv</span>
+  <span>python -c "import sklearn; sklearn.show_versions()"</span></pre>
+
+    <pre class="sk-expandable" data-packager="pip" data-venv=""
+    ><span>python -m pip show scikit-learn  # to see which version and where scikit-learn is installed</span>
+  <span>python -m pip freeze  # to see all packages installed in the active virtualenv</span>
+  <span>python -c "import sklearn; sklearn.show_versions()"</span></pre>
+
+    <pre class="sk-expandable" data-packager="conda"
+    ><span>conda list scikit-learn  # to see which scikit-learn version is installed</span>
+  <span>conda list  # to see all packages installed in the active conda environment</span>
+  <span>python -c "import sklearn; sklearn.show_versions()"</span></pre>
   </div>
 
 Note that in order to avoid potential conflicts with other packages it is
@@ -146,28 +169,6 @@ purpose.
     Scikit-learn 1.1 and later requires Python 3.8 or newer.
 
 
-.. _install_on_apple_silicon_m1:
-
-Installing on Apple Silicon M1 hardware
-=======================================
-
-The recently introduced `macos/arm64` platform (sometimes also known as
-`macos/aarch64`) requires the open source community to upgrade the build
-configuration and automation to properly support it.
-
-At the time of writing (January 2021), the only way to get a working
-installation of scikit-learn on this hardware is to install scikit-learn and its
-dependencies from the conda-forge distribution, for instance using the miniforge
-installers:
-
-https://github.com/conda-forge/miniforge
-
-The following issue tracks progress on making it possible to install
-scikit-learn from PyPI with pip:
-
-https://github.com/scikit-learn/scikit-learn/issues/19137
-
-
 .. _install_by_distribution:
 
 Third party distributions of scikit-learn
@@ -279,17 +280,17 @@ and in the `main`, `conda-forge` and `intel` conda channels:
 
   conda install scikit-learn-intelex
 
-This package has an Intel optimized version of many estimators. Whenever 
-an alternative implementation doesn't exist, scikit-learn implementation 
-is used as a fallback. Those optimized solvers come from the oneDAL 
-C++ library and are optimized for the x86_64 architecture, and are 
+This package has an Intel optimized version of many estimators. Whenever
+an alternative implementation doesn't exist, scikit-learn implementation
+is used as a fallback. Those optimized solvers come from the oneDAL
+C++ library and are optimized for the x86_64 architecture, and are
 optimized for multi-core Intel CPUs.
 
 Note that those solvers are not enabled by default, please refer to the
-`scikit-learn-intelex <https://intel.github.io/scikit-learn-intelex/what-is-patching.html>`_ 
+`scikit-learn-intelex <https://intel.github.io/scikit-learn-intelex/what-is-patching.html>`_
 documentation for more details on usage scenarios. Direct export example:
 
-.. prompt:: bash $
+.. prompt:: python >>>
 
   from sklearnex.neighbors import NearestNeighbors
 
@@ -339,6 +340,6 @@ using the ``regedit`` tool:
 
 #. Reinstall scikit-learn (ignoring the previous broken installation):
 
-.. prompt:: python $
+.. prompt:: bash $
 
     pip install --exists-action=i scikit-learn
diff --git a/doc/jupyter-lite.json b/doc/jupyter-lite.json
index 32a5e43af987b..05a02b1080fa4 100644
--- a/doc/jupyter-lite.json
+++ b/doc/jupyter-lite.json
@@ -3,8 +3,8 @@
   "jupyter-config-data": {
     "litePluginSettings": {
       "@jupyterlite/pyodide-kernel-extension:kernel": {
-        "pyodideUrl": "https://cdn.jsdelivr.net/pyodide/v0.23.1/full/pyodide.js"
+        "pyodideUrl": "https://cdn.jsdelivr.net/pyodide/v0.24.1/full/pyodide.js"
       }
     }
   }
-} 
+}
diff --git a/doc/metadata_routing.rst b/doc/metadata_routing.rst
index a3a443995cfc7..3cb69e0d8906f 100644
--- a/doc/metadata_routing.rst
+++ b/doc/metadata_routing.rst
@@ -34,9 +34,9 @@ you can check our related developer guide:
 
 .. note::
   Note that the methods and requirements introduced in this document are only
-  relevant if you want to pass metadata (e.g. ``sample_weight``) to a method.
+  relevant if you want to pass :term:`metadata` (e.g. ``sample_weight``) to a method.
   If you're only passing ``X`` and ``y`` and no other parameter / metadata to
-  methods such as ``fit``, ``transform``, etc, then you don't need to set
+  methods such as :term:`fit`, :term:`transform`, etc, then you don't need to set
   anything.
 
 Usage Examples
@@ -91,13 +91,13 @@ since ``sample_weigh`` was not requested by any of its underlying objects.
 Weighted scoring and unweighted fitting
 ---------------------------------------
 
-When passing metadata such as ``sample_weight`` around, all scikit-learn
-estimators require weights to be either explicitly requested or not requested
-(i.e. ``True`` or ``False``) when used in another router such as a
-:class:`~pipeline.Pipeline` or a ``*GridSearchCV``. To perform an unweighted
-fit, we need to configure :class:`~linear_model.LogisticRegressionCV` to not
-request sample weights, so that :func:`~model_selection.cross_validate` does
-not pass the weights along::
+When passing metadata such as ``sample_weight`` around, all ``sample_weight``
+:term:`consumers <consumer>` require weights to be either explicitly requested
+or not requested (i.e. ``True`` or ``False``) when used in another
+:term:`router` such as a :class:`~pipeline.Pipeline` or a ``*GridSearchCV``. To
+perform an unweighted fit, we need to configure
+:class:`~linear_model.LogisticRegressionCV` to not request sample weights, so
+that :func:`~model_selection.cross_validate` does not pass the weights along::
 
   >>> weighted_acc = make_scorer(accuracy_score).set_score_request(
   ...     sample_weight=True
@@ -177,16 +177,17 @@ consumers. In this example, we pass ``scoring_weight`` to the scorer, and
 API Interface
 *************
 
-A *consumer* is an object (estimator, meta-estimator, scorer, splitter) which
-accepts and uses some metadata in at least one of its methods (``fit``,
-``predict``, ``inverse_transform``, ``transform``, ``score``, ``split``).
-Meta-estimators which only forward the metadata to other objects (the child
-estimator, scorers, or splitters) and don't use the metadata themselves are not
-consumers. (Meta-)Estimators which route metadata to other objects are
-*routers*. A(n) (meta-)estimator can be a consumer and a router at the same time.
-(Meta-)Estimators and splitters expose a ``set_*_request`` method for each
-method which accepts at least one metadata. For instance, if an estimator
-supports ``sample_weight`` in ``fit`` and ``score``, it exposes
+A :term:`consumer` is an object (estimator, meta-estimator, scorer, splitter)
+which accepts and uses some :term:`metadata` in at least one of its methods
+(``fit``, ``predict``, ``inverse_transform``, ``transform``, ``score``,
+``split``). Meta-estimators which only forward the metadata to other objects
+(the child estimator, scorers, or splitters) and don't use the metadata
+themselves are not consumers. (Meta-)Estimators which route metadata to other
+objects are :term:`routers <router>`. A(n) (meta-)estimator can be a
+:term:`consumer` and a :term:`router` at the same time. (Meta-)Estimators and
+splitters expose a ``set_*_request`` method for each method which accepts at
+least one metadata. For instance, if an estimator supports ``sample_weight`` in
+``fit`` and ``score``, it exposes
 ``estimator.set_fit_request(sample_weight=value)`` and
 ``estimator.set_score_request(sample_weight=value)``. Here ``value`` can be:
 
diff --git a/doc/modules/array_api.rst b/doc/modules/array_api.rst
index 71a2e1ce0a6ce..15e78d25f93df 100644
--- a/doc/modules/array_api.rst
+++ b/doc/modules/array_api.rst
@@ -107,4 +107,4 @@ To run these checks you need to install
 test environment. To run the full set of checks you need to install both
 `PyTorch <https://pytorch.org/>`_ and `CuPy <https://cupy.dev/>`_ and have
 a GPU. Checks that can not be executed or have missing dependencies will be
-automatically skipped.
\ No newline at end of file
+automatically skipped.
diff --git a/doc/modules/biclustering.rst b/doc/modules/biclustering.rst
index 44a996ed0ffd6..2189e85e0f0ef 100644
--- a/doc/modules/biclustering.rst
+++ b/doc/modules/biclustering.rst
@@ -4,8 +4,7 @@
 Biclustering
 ============
 
-Biclustering can be performed with the module
-:mod:`sklearn.cluster.bicluster`. Biclustering algorithms simultaneously
+Biclustering algorithms simultaneously
 cluster rows and columns of a data matrix. These clusters of rows and
 columns are known as biclusters. Each determines a submatrix of the
 original data matrix with some desired properties.
@@ -82,7 +81,7 @@ diagonal and checkerboard bicluster structures.
     these alternate names.
 
 
-.. currentmodule:: sklearn.cluster.bicluster
+.. currentmodule:: sklearn.cluster
 
 
 .. _spectral_coclustering:
diff --git a/doc/modules/classes.rst b/doc/modules/classes.rst
index 204c300b1a9b8..7f6f5c910a3fc 100644
--- a/doc/modules/classes.rst
+++ b/doc/modules/classes.rst
@@ -36,6 +36,7 @@ Base classes
    base.TransformerMixin
    base.MetaEstimatorMixin
    base.OneToOneFeatureMixin
+   base.OutlierMixin
    base.ClassNamePrefixFeaturesOutMixin
    feature_selection.SelectorMixin
 
diff --git a/doc/modules/clustering.rst b/doc/modules/clustering.rst
index f976110ad8712..1f05ce71b549b 100644
--- a/doc/modules/clustering.rst
+++ b/doc/modules/clustering.rst
@@ -931,7 +931,7 @@ by black points below.
     which avoids calculating the full distance matrix
     (as was done in scikit-learn versions before 0.14).
     The possibility to use custom metrics is retained;
-    for details, see :class:`NearestNeighbors`.
+    for details, see :class:`~sklearn.neighbors.NearestNeighbors`.
 
 .. topic:: Memory consumption for large sample sizes
 
diff --git a/doc/modules/compose.rst b/doc/modules/compose.rst
index 5bcee9550b968..965765ac1f7b2 100644
--- a/doc/modules/compose.rst
+++ b/doc/modules/compose.rst
@@ -41,12 +41,21 @@ All estimators in a pipeline, except the last one, must be transformers
 (i.e. must have a :term:`transform` method).
 The last estimator may be any type (transformer, classifier, etc.).
 
+.. note::
+
+    Calling ``fit`` on the pipeline is the same as calling ``fit`` on
+    each estimator in turn, ``transform`` the input and pass it on to the next step.
+    The pipeline has all the methods that the last estimator in the pipeline has,
+    i.e. if the last estimator is a classifier, the :class:`Pipeline` can be used
+    as a classifier. If the last estimator is a transformer, again, so is the
+    pipeline.
+
 
 Usage
 -----
 
-Construction
-............
+Build a pipeline
+................
 
 The :class:`Pipeline` is built using a list of ``(key, value)`` pairs, where
 the ``key`` is a string containing the name you want to give this step and ``value``
@@ -60,23 +69,41 @@ is an estimator object::
     >>> pipe
     Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC())])
 
+|details-start|
+**Shorthand version using :func:`make_pipeline`**
+|details-split|
+
 The utility function :func:`make_pipeline` is a shorthand
 for constructing pipelines;
 it takes a variable number of estimators and returns a pipeline,
 filling in the names automatically::
 
     >>> from sklearn.pipeline import make_pipeline
-    >>> from sklearn.naive_bayes import MultinomialNB
-    >>> from sklearn.preprocessing import Binarizer
-    >>> make_pipeline(Binarizer(), MultinomialNB())
-    Pipeline(steps=[('binarizer', Binarizer()), ('multinomialnb', MultinomialNB())])
+    >>> make_pipeline(PCA(), SVC())
+    Pipeline(steps=[('pca', PCA()), ('svc', SVC())])
+
+|details-end|
+
+Access pipeline steps
+.....................
+
+The estimators of a pipeline are stored as a list in the ``steps`` attribute.
+A sub-pipeline can be extracted using the slicing notation commonly used
+for Python Sequences such as lists or strings (although only a step of 1 is
+permitted). This is convenient for performing only some of the transformations
+(or their inverse):
+
+    >>> pipe[:1]
+    Pipeline(steps=[('reduce_dim', PCA())])
+    >>> pipe[-1:]
+    Pipeline(steps=[('clf', SVC())])
 
-Accessing steps
-...............
+|details-start|
+**Accessing a step by name or position**
+|details-split|
 
-The estimators of a pipeline are stored as a list in the ``steps`` attribute,
-but can be accessed by index or name by indexing (with ``[idx]``) the
-Pipeline::
+A specific step can also be accessed by index or name by indexing (with ``[idx]``) the
+pipeline::
 
     >>> pipe.steps[0]
     ('reduce_dim', PCA())
@@ -85,34 +112,63 @@ Pipeline::
     >>> pipe['reduce_dim']
     PCA()
 
-Pipeline's `named_steps` attribute allows accessing steps by name with tab
+`Pipeline`'s `named_steps` attribute allows accessing steps by name with tab
 completion in interactive environments::
 
     >>> pipe.named_steps.reduce_dim is pipe['reduce_dim']
     True
 
-A sub-pipeline can also be extracted using the slicing notation commonly used
-for Python Sequences such as lists or strings (although only a step of 1 is
-permitted). This is convenient for performing only some of the transformations
-(or their inverse):
+|details-end|
 
-    >>> pipe[:1]
-    Pipeline(steps=[('reduce_dim', PCA())])
-    >>> pipe[-1:]
-    Pipeline(steps=[('clf', SVC())])
+Tracking feature names in a pipeline
+....................................
 
+To enable model inspection, :class:`~sklearn.pipeline.Pipeline` has a
+``get_feature_names_out()`` method, just like all transformers. You can use
+pipeline slicing to get the feature names going into each step::
+
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> from sklearn.feature_selection import SelectKBest
+    >>> iris = load_iris()
+    >>> pipe = Pipeline(steps=[
+    ...    ('select', SelectKBest(k=2)),
+    ...    ('clf', LogisticRegression())])
+    >>> pipe.fit(iris.data, iris.target)
+    Pipeline(steps=[('select', SelectKBest(...)), ('clf', LogisticRegression(...))])
+    >>> pipe[:-1].get_feature_names_out()
+    array(['x2', 'x3'], ...)
+
+|details-start|
+**Customize feature names**
+|details-split|
+
+You can also provide custom feature names for the input data using
+``get_feature_names_out``::
+
+    >>> pipe[:-1].get_feature_names_out(iris.feature_names)
+    array(['petal length (cm)', 'petal width (cm)'], ...)
+
+|details-end|
 
 .. _pipeline_nested_parameters:
 
-Nested parameters
-.................
+Access to nested parameters
+...........................
 
-Parameters of the estimators in the pipeline can be accessed using the
-``<estimator>__<parameter>`` syntax::
+It is common to adjust the parameters of an estimator within a pipeline. This parameter
+is therefore nested because it belongs to a particular sub-step. Parameters of the
+estimators in the pipeline are accessible using the ``<estimator>__<parameter>``
+syntax::
 
+    >>> pipe = Pipeline(steps=[("reduce_dim", PCA()), ("clf", SVC())])
     >>> pipe.set_params(clf__C=10)
     Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC(C=10))])
 
+|details-start|
+**When does it matter?**
+|details-split|
+
 This is particularly important for doing grid searches::
 
     >>> from sklearn.model_selection import GridSearchCV
@@ -123,42 +179,16 @@ This is particularly important for doing grid searches::
 Individual steps may also be replaced as parameters, and non-final steps may be
 ignored by setting them to ``'passthrough'``::
 
-    >>> from sklearn.linear_model import LogisticRegression
     >>> param_grid = dict(reduce_dim=['passthrough', PCA(5), PCA(10)],
     ...                   clf=[SVC(), LogisticRegression()],
     ...                   clf__C=[0.1, 10, 100])
     >>> grid_search = GridSearchCV(pipe, param_grid=param_grid)
 
-The estimators of the pipeline can be retrieved by index:
-
-    >>> pipe[0]
-    PCA()
-
-or by name::
-
-    >>> pipe['reduce_dim']
-    PCA()
-
-To enable model inspection, :class:`~sklearn.pipeline.Pipeline` has a
-``get_feature_names_out()`` method, just like all transformers. You can use
-pipeline slicing to get the feature names going into each step::
-
-    >>> from sklearn.datasets import load_iris
-    >>> from sklearn.feature_selection import SelectKBest
-    >>> iris = load_iris()
-    >>> pipe = Pipeline(steps=[
-    ...    ('select', SelectKBest(k=2)),
-    ...    ('clf', LogisticRegression())])
-    >>> pipe.fit(iris.data, iris.target)
-    Pipeline(steps=[('select', SelectKBest(...)), ('clf', LogisticRegression(...))])
-    >>> pipe[:-1].get_feature_names_out()
-    array(['x2', 'x3'], ...)
+.. topic:: See Also:
 
-You can also provide custom feature names for the input data using
-``get_feature_names_out``::
+ * :ref:`composite_grid_search`
 
-    >>> pipe[:-1].get_feature_names_out(iris.feature_names)
-    array(['petal length (cm)', 'petal width (cm)'], ...)
+|details-end|
 
 .. topic:: Examples:
 
@@ -170,20 +200,6 @@ You can also provide custom feature names for the input data using
  * :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py`
  * :ref:`sphx_glr_auto_examples_miscellaneous_plot_pipeline_display.py`
 
-.. topic:: See Also:
-
- * :ref:`composite_grid_search`
-
-
-Notes
------
-
-Calling ``fit`` on the pipeline is the same as calling ``fit`` on
-each estimator in turn, ``transform`` the input and pass it on to the next step.
-The pipeline has all the methods that the last estimator in the pipeline has,
-i.e. if the last estimator is a classifier, the :class:`Pipeline` can be used
-as a classifier. If the last estimator is a transformer, again, so is the
-pipeline.
 
 .. _pipeline_cache:
 
@@ -219,43 +235,49 @@ object::
     >>> # Clear the cache directory when you don't need it anymore
     >>> rmtree(cachedir)
 
-.. warning:: **Side effect of caching transformers**
-
-   Using a :class:`Pipeline` without cache enabled, it is possible to
-   inspect the original instance such as::
-
-     >>> from sklearn.datasets import load_digits
-     >>> X_digits, y_digits = load_digits(return_X_y=True)
-     >>> pca1 = PCA()
-     >>> svm1 = SVC()
-     >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)])
-     >>> pipe.fit(X_digits, y_digits)
-     Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC())])
-     >>> # The pca instance can be inspected directly
-     >>> print(pca1.components_)
-         [[-1.77484909e-19  ... 4.07058917e-18]]
-
-   Enabling caching triggers a clone of the transformers before fitting.
-   Therefore, the transformer instance given to the pipeline cannot be
-   inspected directly.
-   In following example, accessing the :class:`PCA` instance ``pca2``
-   will raise an ``AttributeError`` since ``pca2`` will be an unfitted
-   transformer.
-   Instead, use the attribute ``named_steps`` to inspect estimators within
-   the pipeline::
-
-     >>> cachedir = mkdtemp()
-     >>> pca2 = PCA()
-     >>> svm2 = SVC()
-     >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)],
-     ...                        memory=cachedir)
-     >>> cached_pipe.fit(X_digits, y_digits)
-     Pipeline(memory=...,
+|details-start|
+**Warning: Side effect of caching transformers**
+|details-split|
+
+Using a :class:`Pipeline` without cache enabled, it is possible to
+inspect the original instance such as::
+
+    >>> from sklearn.datasets import load_digits
+    >>> X_digits, y_digits = load_digits(return_X_y=True)
+    >>> pca1 = PCA()
+    >>> svm1 = SVC()
+    >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)])
+    >>> pipe.fit(X_digits, y_digits)
+    Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC())])
+    >>> # The pca instance can be inspected directly
+    >>> print(pca1.components_)
+        [[-1.77484909e-19  ... 4.07058917e-18]]
+
+
+Enabling caching triggers a clone of the transformers before fitting.
+Therefore, the transformer instance given to the pipeline cannot be
+inspected directly.
+In following example, accessing the :class:`~sklearn.decomposition.PCA`
+instance ``pca2`` will raise an ``AttributeError`` since ``pca2`` will be an
+unfitted transformer.
+Instead, use the attribute ``named_steps`` to inspect estimators within
+the pipeline::
+
+    >>> cachedir = mkdtemp()
+    >>> pca2 = PCA()
+    >>> svm2 = SVC()
+    >>> cached_pipe = Pipeline([('reduce_dim', pca2), ('clf', svm2)],
+    ...                        memory=cachedir)
+    >>> cached_pipe.fit(X_digits, y_digits)
+    Pipeline(memory=...,
              steps=[('reduce_dim', PCA()), ('clf', SVC())])
-     >>> print(cached_pipe.named_steps['reduce_dim'].components_)
-         [[-1.77484909e-19  ... 4.07058917e-18]]
-     >>> # Remove the cache directory
-     >>> rmtree(cachedir)
+    >>> print(cached_pipe.named_steps['reduce_dim'].components_)
+        [[-1.77484909e-19  ... 4.07058917e-18]]
+    >>> # Remove the cache directory
+    >>> rmtree(cachedir)
+
+
+|details-end|
 
 .. topic:: Examples:
 
diff --git a/doc/modules/cross_validation.rst b/doc/modules/cross_validation.rst
index 6158e000cb727..fd3d5f170056f 100644
--- a/doc/modules/cross_validation.rst
+++ b/doc/modules/cross_validation.rst
@@ -102,6 +102,7 @@ where the number of samples is very small.
 .. image:: ../images/grid_search_cross_validation.png
    :width: 500px
    :height: 300px
+   :alt: A depiction of a 5 fold cross validation on a training set, while holding out a test set.
    :align: center
 
 Computing cross-validated metrics
@@ -451,7 +452,7 @@ fold cross validation should be preferred to LOO.
  * R. Bharat Rao, G. Fung, R. Rosales, `On the Dangers of Cross-Validation. An Experimental Evaluation
    <https://people.csail.mit.edu/romer/papers/CrossVal_SDM08.pdf>`_, SIAM 2008;
  * G. James, D. Witten, T. Hastie, R Tibshirani, `An Introduction to
-   Statistical Learning <https://www-bcf.usc.edu/~gareth/ISL/>`_, Springer 2013.
+   Statistical Learning <https://www.statlearning.com>`_, Springer 2013.
 
 .. _leave_p_out:
 
diff --git a/doc/modules/decomposition.rst b/doc/modules/decomposition.rst
index 6a55895b65f07..b852a6133d542 100644
--- a/doc/modules/decomposition.rst
+++ b/doc/modules/decomposition.rst
@@ -808,7 +808,7 @@ faces dataset, in comparison with the PCA eigenfaces.
 .. centered:: |pca_img5| |nmf_img5|
 
 
-The :attr:`init` attribute determines the initialization method applied, which
+The `init` attribute determines the initialization method applied, which
 has a great impact on the performance of the method. :class:`NMF` implements the
 method Nonnegative Double Singular Value Decomposition. NNDSVD [4]_ is based on
 two SVD processes, one approximating the data matrix, the other approximating
@@ -825,20 +825,20 @@ basic NNDSVD algorithm which introduces a lot of zeros; in this case, NNDSVDa or
 NNDSVDar should be preferred.
 
 :class:`NMF` can also be initialized with correctly scaled random non-negative
-matrices by setting :attr:`init="random"`. An integer seed or a
-``RandomState`` can also be passed to :attr:`random_state` to control
+matrices by setting `init="random"`. An integer seed or a
+``RandomState`` can also be passed to `random_state` to control
 reproducibility.
 
-In :class:`NMF`, L1 and L2 priors can be added to the loss function in order
-to regularize the model. The L2 prior uses the Frobenius norm, while the L1
-prior uses an elementwise L1 norm. As in :class:`ElasticNet`, we control the
-combination of L1 and L2 with the :attr:`l1_ratio` (:math:`\rho`) parameter,
-and the intensity of the regularization with the :attr:`alpha_W` and :attr:`alpha_H`
-(:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are scaled by the number
-of samples (:math:`n\_samples`) for `H` and the number of features (:math:`n\_features`)
-for `W` to keep their impact balanced with respect to one another and to the data fit
-term as independent as possible of the size of the training set. Then the priors terms
-are:
+In :class:`NMF`, L1 and L2 priors can be added to the loss function in order to
+regularize the model. The L2 prior uses the Frobenius norm, while the L1 prior
+uses an elementwise L1 norm. As in :class:`~sklearn.linear_model.ElasticNet`,
+we control the combination of L1 and L2 with the `l1_ratio` (:math:`\rho`)
+parameter, and the intensity of the regularization with the `alpha_W` and
+`alpha_H` (:math:`\alpha_W` and :math:`\alpha_H`) parameters. The priors are
+scaled by the number of samples (:math:`n\_samples`) for `H` and the number of
+features (:math:`n\_features`) for `W` to keep their impact balanced with
+respect to one another and to the data fit term as independent as possible of
+the size of the training set. Then the priors terms are:
 
 .. math::
     (\alpha_W \rho ||W||_1 + \frac{\alpha_W(1-\rho)}{2} ||W||_{\mathrm{Fro}} ^ 2) * n\_features
@@ -1002,16 +1002,16 @@ structure.
 
 When modeling text corpora, the model assumes the following generative process
 for a corpus with :math:`D` documents and :math:`K` topics, with :math:`K`
-corresponding to :attr:`n_components` in the API:
+corresponding to `n_components` in the API:
 
   1. For each topic :math:`k \in K`, draw :math:`\beta_k \sim
      \mathrm{Dirichlet}(\eta)`. This provides a distribution over the words,
      i.e. the probability of a word appearing in topic :math:`k`.
-     :math:`\eta` corresponds to :attr:`topic_word_prior`.
+     :math:`\eta` corresponds to `topic_word_prior`.
 
   2. For each document :math:`d \in D`, draw the topic proportions
      :math:`\theta_d \sim \mathrm{Dirichlet}(\alpha)`. :math:`\alpha`
-     corresponds to :attr:`doc_topic_prior`.
+     corresponds to `doc_topic_prior`.
 
   3. For each word :math:`i` in document :math:`d`:
 
@@ -1054,7 +1054,7 @@ points.
 
 When :class:`LatentDirichletAllocation` is applied on a "document-term" matrix, the matrix
 will be decomposed into a "topic-term" matrix and a "document-topic" matrix. While
-"topic-term" matrix is stored as :attr:`components_` in the model, "document-topic" matrix
+"topic-term" matrix is stored as `components_` in the model, "document-topic" matrix
 can be calculated from ``transform`` method.
 
 :class:`LatentDirichletAllocation` also implements ``partial_fit`` method. This is used
diff --git a/doc/modules/density.rst b/doc/modules/density.rst
index fc0530ed262c0..5a9b456010aa3 100644
--- a/doc/modules/density.rst
+++ b/doc/modules/density.rst
@@ -113,6 +113,10 @@ forms, which are shown in the following figure:
 
 .. centered:: |kde_kernels|
 
+|details-start|
+**kernels' mathematical expressions**
+|details-split|
+
 The form of these kernels is as follows:
 
 * Gaussian kernel (``kernel = 'gaussian'``)
@@ -139,6 +143,8 @@ The form of these kernels is as follows:
 
   :math:`K(x; h) \propto \cos(\frac{\pi x}{2h})` if :math:`x < h`
 
+|details-end|
+
 The kernel density estimator can be used with any of the valid distance
 metrics (see :class:`~sklearn.metrics.DistanceMetric` for a list of
 available metrics), though the results are properly normalized only
diff --git a/doc/modules/ensemble.rst b/doc/modules/ensemble.rst
index 4559effc00fc1..552e47a3f049c 100644
--- a/doc/modules/ensemble.rst
+++ b/doc/modules/ensemble.rst
@@ -1,534 +1,441 @@
 .. _ensemble:
 
-================
-Ensemble methods
-================
+===========================================================================
+Ensembles: Gradient boosting, random forests, bagging, voting, stacking
+===========================================================================
 
 .. currentmodule:: sklearn.ensemble
 
-The goal of **ensemble methods** is to combine the predictions of several
+**Ensemble methods** combine the predictions of several
 base estimators built with a given learning algorithm in order to improve
 generalizability / robustness over a single estimator.
 
-Two families of ensemble methods are usually distinguished:
+Two very famous examples of ensemble methods are :ref:`gradient-boosted trees
+<gradient_boosting>` and :ref:`random forests <forest>`.
 
-- In **averaging methods**, the driving principle is to build several
-  estimators independently and then to average their predictions. On average,
-  the combined estimator is usually better than any of the single base
-  estimator because its variance is reduced.
+More generally, ensemble models can be applied to any base learner beyond
+trees, in averaging methods such as :ref:`Bagging methods <bagging>`,
+:ref:`model stacking <stacking>`, or :ref:`Voting <voting_classifier>`, or in
+boosting, as :ref:`AdaBoost <adaboost>`.
 
-  **Examples:** :ref:`Bagging methods <bagging>`, :ref:`Forests of randomized trees <forest>`, ...
+.. contents::
+    :local:
+    :depth: 1
 
-- By contrast, in **boosting methods**, base estimators are built sequentially
-  and one tries to reduce the bias of the combined estimator. The motivation is
-  to combine several weak models to produce a powerful ensemble.
+.. _gradient_boosting:
 
-  **Examples:** :ref:`AdaBoost <adaboost>`, :ref:`Gradient Tree Boosting <gradient_boosting>`, ...
+Gradient-boosted trees
+======================
 
+`Gradient Tree Boosting <https://en.wikipedia.org/wiki/Gradient_boosting>`_
+or Gradient Boosted Decision Trees (GBDT) is a generalization
+of boosting to arbitrary differentiable loss functions, see the seminal work of
+[Friedman2001]_. GBDT is an excellent model for both regression and
+classification, in particular for tabular data.
 
-.. _bagging:
+.. topic:: :class:`GradientBoostingClassifier` vs :class:`HistGradientBoostingClassifier`
 
-Bagging meta-estimator
-======================
+  Scikit-learn provides two implementations of gradient-boosted trees:
+  :class:`HistGradientBoostingClassifier` vs
+  :class:`GradientBoostingClassifier` for classification, and the
+  corresponding classes for regression. The former can be **orders of
+  magnitude faster** than the latter when the number of samples is
+  larger than tens of thousands of samples.
 
-In ensemble algorithms, bagging methods form a class of algorithms which build
-several instances of a black-box estimator on random subsets of the original
-training set and then aggregate their individual predictions to form a final
-prediction. These methods are used as a way to reduce the variance of a base
-estimator (e.g., a decision tree), by introducing randomization into its
-construction procedure and then making an ensemble out of it. In many cases,
-bagging methods constitute a very simple way to improve with respect to a
-single model, without making it necessary to adapt the underlying base
-algorithm. As they provide a way to reduce overfitting, bagging methods work
-best with strong and complex models (e.g., fully developed decision trees), in
-contrast with boosting methods which usually work best with weak models (e.g.,
-shallow decision trees).
+  Missing values and categorical data are natively supported by the
+  Hist... version, removing the need for additional preprocessing such as
+  imputation.
 
-Bagging methods come in many flavours but mostly differ from each other by the
-way they draw random subsets of the training set:
+  :class:`GradientBoostingClassifier` and
+  :class:`GradientBoostingRegressor`, might be preferred for small sample
+  sizes since binning may lead to split points that are too approximate
+  in this setting.
 
-  * When random subsets of the dataset are drawn as random subsets of the
-    samples, then this algorithm is known as Pasting [B1999]_.
+.. _histogram_based_gradient_boosting:
 
-  * When samples are drawn with replacement, then the method is known as
-    Bagging [B1996]_.
+Histogram-Based Gradient Boosting
+----------------------------------
 
-  * When random subsets of the dataset are drawn as random subsets of
-    the features, then the method is known as Random Subspaces [H1998]_.
+Scikit-learn 0.21 introduced two new implementations of
+gradient boosted trees, namely :class:`HistGradientBoostingClassifier`
+and :class:`HistGradientBoostingRegressor`, inspired by
+`LightGBM <https://github.com/Microsoft/LightGBM>`__ (See [LightGBM]_).
 
-  * Finally, when base estimators are built on subsets of both samples and
-    features, then the method is known as Random Patches [LG2012]_.
+These histogram-based estimators can be **orders of magnitude faster**
+than :class:`GradientBoostingClassifier` and
+:class:`GradientBoostingRegressor` when the number of samples is larger
+than tens of thousands of samples.
 
-In scikit-learn, bagging methods are offered as a unified
-:class:`BaggingClassifier` meta-estimator  (resp. :class:`BaggingRegressor`),
-taking as input a user-specified estimator along with parameters
-specifying the strategy to draw random subsets. In particular, ``max_samples``
-and ``max_features`` control the size of the subsets (in terms of samples and
-features), while ``bootstrap`` and ``bootstrap_features`` control whether
-samples and features are drawn with or without replacement. When using a subset
-of the available samples the generalization accuracy can be estimated with the
-out-of-bag samples by setting ``oob_score=True``. As an example, the
-snippet below illustrates how to instantiate a bagging ensemble of
-:class:`KNeighborsClassifier` estimators, each built on random subsets of
-50% of the samples and 50% of the features.
+They also have built-in support for missing values, which avoids the need
+for an imputer.
 
-    >>> from sklearn.ensemble import BaggingClassifier
-    >>> from sklearn.neighbors import KNeighborsClassifier
-    >>> bagging = BaggingClassifier(KNeighborsClassifier(),
-    ...                             max_samples=0.5, max_features=0.5)
+These fast estimators first bin the input samples ``X`` into
+integer-valued bins (typically 256 bins) which tremendously reduces the
+number of splitting points to consider, and allows the algorithm to
+leverage integer-based data structures (histograms) instead of relying on
+sorted continuous values when building the trees. The API of these
+estimators is slightly different, and some of the features from
+:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`
+are not yet supported, for instance some loss functions.
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_bias_variance.py`
+ * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
 
-.. topic:: References
+Usage
+^^^^^
 
-  .. [B1999] L. Breiman, "Pasting small votes for classification in large
-         databases and on-line", Machine Learning, 36(1), 85-103, 1999.
+Most of the parameters are unchanged from
+:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`.
+One exception is the ``max_iter`` parameter that replaces ``n_estimators``, and
+controls the number of iterations of the boosting process::
 
-  .. [B1996] L. Breiman, "Bagging predictors", Machine Learning, 24(2),
-         123-140, 1996.
+  >>> from sklearn.ensemble import HistGradientBoostingClassifier
+  >>> from sklearn.datasets import make_hastie_10_2
 
-  .. [H1998] T. Ho, "The random subspace method for constructing decision
-         forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844,
-         1998.
+  >>> X, y = make_hastie_10_2(random_state=0)
+  >>> X_train, X_test = X[:2000], X[2000:]
+  >>> y_train, y_test = y[:2000], y[2000:]
 
-  .. [LG2012] G. Louppe and P. Geurts, "Ensembles on Random Patches",
-         Machine Learning and Knowledge Discovery in Databases, 346-361, 2012.
+  >>> clf = HistGradientBoostingClassifier(max_iter=100).fit(X_train, y_train)
+  >>> clf.score(X_test, y_test)
+  0.8965
 
-.. _forest:
+Available losses for regression are 'squared_error',
+'absolute_error', which is less sensitive to outliers, and
+'poisson', which is well suited to model counts and frequencies. For
+classification, 'log_loss' is the only option. For binary classification it uses the
+binary log loss, also known as binomial deviance or binary cross-entropy. For
+`n_classes >= 3`, it uses the multi-class log loss function, with multinomial deviance
+and categorical cross-entropy as alternative names. The appropriate loss version is
+selected based on :term:`y` passed to :term:`fit`.
 
-Forests of randomized trees
-===========================
+The size of the trees can be controlled through the ``max_leaf_nodes``,
+``max_depth``, and ``min_samples_leaf`` parameters.
 
-The :mod:`sklearn.ensemble` module includes two averaging algorithms based
-on randomized :ref:`decision trees <tree>`: the RandomForest algorithm
-and the Extra-Trees method. Both algorithms are perturb-and-combine
-techniques [B1998]_ specifically designed for trees. This means a diverse
-set of classifiers is created by introducing randomness in the classifier
-construction.  The prediction of the ensemble is given as the averaged
-prediction of the individual classifiers.
+The number of bins used to bin the data is controlled with the ``max_bins``
+parameter. Using less bins acts as a form of regularization. It is
+generally recommended to use as many bins as possible (256), which is the default.
 
-As other classifiers, forest classifiers have to be fitted with two
-arrays: a sparse or dense array X of shape ``(n_samples, n_features)``
-holding the training samples, and an array Y of shape ``(n_samples,)``
-holding the target values (class labels) for the training samples::
+The ``l2_regularization`` parameter is a regularizer on the loss function and
+corresponds to :math:`\lambda` in equation (2) of [XGBoost]_.
 
-    >>> from sklearn.ensemble import RandomForestClassifier
-    >>> X = [[0, 0], [1, 1]]
-    >>> Y = [0, 1]
-    >>> clf = RandomForestClassifier(n_estimators=10)
-    >>> clf = clf.fit(X, Y)
+Note that **early-stopping is enabled by default if the number of samples is
+larger than 10,000**. The early-stopping behaviour is controlled via the
+``early_stopping``, ``scoring``, ``validation_fraction``,
+``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop
+using an arbitrary :term:`scorer`, or just the training or validation loss.
+Note that for technical reasons, using a scorer is significantly slower than
+using the loss. By default, early-stopping is performed if there are at least
+10,000 samples in the training set, and uses the validation loss.
 
-Like :ref:`decision trees <tree>`, forests of trees also extend to
-:ref:`multi-output problems <tree_multioutput>`  (if Y is an array
-of shape ``(n_samples, n_outputs)``).
+Missing values support
+^^^^^^^^^^^^^^^^^^^^^^
 
-Random Forests
---------------
+:class:`HistGradientBoostingClassifier` and
+:class:`HistGradientBoostingRegressor` have built-in support for missing
+values (NaNs).
 
-In random forests (see :class:`RandomForestClassifier` and
-:class:`RandomForestRegressor` classes), each tree in the ensemble is built
-from a sample drawn with replacement (i.e., a bootstrap sample) from the
-training set.
+During training, the tree grower learns at each split point whether samples
+with missing values should go to the left or right child, based on the
+potential gain. When predicting, samples with missing values are assigned to
+the left or right child consequently::
 
-Furthermore, when splitting each node during the construction of a tree, the
-best split is found either from all input features or a random subset of size
-``max_features``. (See the :ref:`parameter tuning guidelines
-<random_forest_parameters>` for more details).
+  >>> from sklearn.ensemble import HistGradientBoostingClassifier
+  >>> import numpy as np
 
-The purpose of these two sources of randomness is to decrease the variance of
-the forest estimator. Indeed, individual decision trees typically exhibit high
-variance and tend to overfit. The injected randomness in forests yield decision
-trees with somewhat decoupled prediction errors. By taking an average of those
-predictions, some errors can cancel out. Random forests achieve a reduced
-variance by combining diverse trees, sometimes at the cost of a slight increase
-in bias. In practice the variance reduction is often significant hence yielding
-an overall better model.
+  >>> X = np.array([0, 1, 2, np.nan]).reshape(-1, 1)
+  >>> y = [0, 0, 1, 1]
 
-In contrast to the original publication [B2001]_, the scikit-learn
-implementation combines classifiers by averaging their probabilistic
-prediction, instead of letting each classifier vote for a single class.
+  >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y)
+  >>> gbdt.predict(X)
+  array([0, 0, 1, 1])
 
-A competitive alternative to random forests are
-:ref:`histogram_based_gradient_boosting` (HGBT) models:
+When the missingness pattern is predictive, the splits can be performed on
+whether the feature value is missing or not::
 
--  Building trees: Random forests typically rely on deep trees (that overfit
-   individually) which uses much computational resources, as they require
-   several splittings and evaluations of candidate splits. Boosting models
-   build shallow trees (that underfit individually) which are faster to fit
-   and predict.
+  >>> X = np.array([0, np.nan, 1, 2, np.nan]).reshape(-1, 1)
+  >>> y = [0, 1, 0, 0, 1]
+  >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1,
+  ...                                       max_depth=2,
+  ...                                       learning_rate=1,
+  ...                                       max_iter=1).fit(X, y)
+  >>> gbdt.predict(X)
+  array([0, 1, 0, 0, 1])
 
--  Sequential boosting: In HGBT, the decision trees are built sequentially,
-   where each tree is trained to correct the errors made by the previous ones.
-   This allows them to iteratively improve the model's performance using
-   relatively few trees. In contrast, random forests use a majority vote to
-   predict the outcome, which can require a larger number of trees to achieve
-   the same level of accuracy.
+If no missing values were encountered for a given feature during training,
+then samples with missing values are mapped to whichever child has the most
+samples.
 
--  Efficient binning: HGBT uses an efficient binning algorithm that can handle
-   large datasets with a high number of features. The binning algorithm can
-   pre-process the data to speed up the subsequent tree construction (see
-   :ref:`Why it's faster <Why_it's_faster>`). In contrast, the scikit-learn
-   implementation of random forests does not use binning and relies on exact
-   splitting, which can be computationally expensive.
+.. _sw_hgbdt:
 
-Overall, the computational cost of HGBT versus RF depends on the specific
-characteristics of the dataset and the modeling task. It's always a good idea
-to try both models and compare their performance and computational efficiency
-on your specific problem to determine which model is the best fit.
+Sample weight support
+^^^^^^^^^^^^^^^^^^^^^
 
-.. topic:: Examples:
+:class:`HistGradientBoostingClassifier` and
+:class:`HistGradientBoostingRegressor` support sample weights during
+:term:`fit`.
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py`
+The following toy example demonstrates that samples with a sample weight of zero are ignored:
 
-Extremely Randomized Trees
---------------------------
+    >>> X = [[1, 0],
+    ...      [1, 0],
+    ...      [1, 0],
+    ...      [0, 1]]
+    >>> y = [0, 0, 1, 0]
+    >>> # ignore the first 2 training samples by setting their weight to 0
+    >>> sample_weight = [0, 0, 1, 1]
+    >>> gb = HistGradientBoostingClassifier(min_samples_leaf=1)
+    >>> gb.fit(X, y, sample_weight=sample_weight)
+    HistGradientBoostingClassifier(...)
+    >>> gb.predict([[1, 0]])
+    array([1])
+    >>> gb.predict_proba([[1, 0]])[0, 1]
+    0.99...
 
-In extremely randomized trees (see :class:`ExtraTreesClassifier`
-and :class:`ExtraTreesRegressor` classes), randomness goes one step
-further in the way splits are computed. As in random forests, a random
-subset of candidate features is used, but instead of looking for the
-most discriminative thresholds, thresholds are drawn at random for each
-candidate feature and the best of these randomly-generated thresholds is
-picked as the splitting rule. This usually allows to reduce the variance
-of the model a bit more, at the expense of a slightly greater increase
-in bias::
+As you can see, the `[1, 0]` is comfortably classified as `1` since the first
+two samples are ignored due to their sample weights.
 
-    >>> from sklearn.model_selection import cross_val_score
-    >>> from sklearn.datasets import make_blobs
-    >>> from sklearn.ensemble import RandomForestClassifier
-    >>> from sklearn.ensemble import ExtraTreesClassifier
-    >>> from sklearn.tree import DecisionTreeClassifier
+Implementation detail: taking sample weights into account amounts to
+multiplying the gradients (and the hessians) by the sample weights. Note that
+the binning stage (specifically the quantiles computation) does not take the
+weights into account.
 
-    >>> X, y = make_blobs(n_samples=10000, n_features=10, centers=100,
-    ...     random_state=0)
+.. _categorical_support_gbdt:
 
-    >>> clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2,
-    ...     random_state=0)
-    >>> scores = cross_val_score(clf, X, y, cv=5)
-    >>> scores.mean()
-    0.98...
+Categorical Features Support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-    >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None,
-    ...     min_samples_split=2, random_state=0)
-    >>> scores = cross_val_score(clf, X, y, cv=5)
-    >>> scores.mean()
-    0.999...
-
-    >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None,
-    ...     min_samples_split=2, random_state=0)
-    >>> scores = cross_val_score(clf, X, y, cv=5)
-    >>> scores.mean() > 0.999
-    True
+:class:`HistGradientBoostingClassifier` and
+:class:`HistGradientBoostingRegressor` have native support for categorical
+features: they can consider splits on non-ordered, categorical data.
 
-.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_iris_001.png
-    :target: ../auto_examples/ensemble/plot_forest_iris.html
-    :align: center
-    :scale: 75%
+For datasets with categorical features, using the native categorical support
+is often better than relying on one-hot encoding
+(:class:`~sklearn.preprocessing.OneHotEncoder`), because one-hot encoding
+requires more tree depth to achieve equivalent splits. It is also usually
+better to rely on the native categorical support rather than to treat
+categorical features as continuous (ordinal), which happens for ordinal-encoded
+categorical data, since categories are nominal quantities where order does not
+matter.
 
-.. _random_forest_parameters:
+To enable categorical support, a boolean mask can be passed to the
+`categorical_features` parameter, indicating which feature is categorical. In
+the following, the first feature will be treated as categorical and the
+second feature as numerical::
 
-Parameters
-----------
+  >>> gbdt = HistGradientBoostingClassifier(categorical_features=[True, False])
 
-The main parameters to adjust when using these methods is ``n_estimators`` and
-``max_features``. The former is the number of trees in the forest. The larger
-the better, but also the longer it will take to compute. In addition, note that
-results will stop getting significantly better beyond a critical number of
-trees. The latter is the size of the random subsets of features to consider
-when splitting a node. The lower the greater the reduction of variance, but
-also the greater the increase in bias. Empirical good default values are
-``max_features=1.0`` or equivalently ``max_features=None`` (always considering
-all features instead of a random subset) for regression problems, and
-``max_features="sqrt"`` (using a random subset of size ``sqrt(n_features)``)
-for classification tasks (where ``n_features`` is the number of features in
-the data). The default value of ``max_features=1.0`` is equivalent to bagged
-trees and more randomness can be achieved by setting smaller values (e.g. 0.3
-is a typical default in the literature). Good results are often achieved when
-setting ``max_depth=None`` in combination with ``min_samples_split=2`` (i.e.,
-when fully developing the trees). Bear in mind though that these values are
-usually not optimal, and might result in models that consume a lot of RAM.
-The best parameter values should always be cross-validated. In addition, note
-that in random forests, bootstrap samples are used by default
-(``bootstrap=True``) while the default strategy for extra-trees is to use the
-whole dataset (``bootstrap=False``). When using bootstrap sampling the
-generalization error can be estimated on the left out or out-of-bag samples.
-This can be enabled by setting ``oob_score=True``.
+Equivalently, one can pass a list of integers indicating the indices of the
+categorical features::
 
-.. note::
+  >>> gbdt = HistGradientBoostingClassifier(categorical_features=[0])
 
-    The size of the model with the default parameters is :math:`O( M * N * log (N) )`,
-    where :math:`M` is the number of trees and :math:`N` is the number of samples.
-    In order to reduce the size of the model, you can change these parameters:
-    ``min_samples_split``, ``max_leaf_nodes``, ``max_depth`` and ``min_samples_leaf``.
+The cardinality of each categorical feature must be less than the `max_bins`
+parameter, and each categorical feature is expected to be encoded in
+`[0, max_bins - 1]`. To that end, it might be useful to pre-process the data
+with an :class:`~sklearn.preprocessing.OrdinalEncoder` as done in
+:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`.
 
-Parallelization
----------------
+If there are missing values during training, the missing values will be
+treated as a proper category. If there are no missing values during training,
+then at prediction time, missing values are mapped to the child node that has
+the most samples (just like for continuous features). When predicting,
+categories that were not seen during fit time will be treated as missing
+values.
 
-Finally, this module also features the parallel construction of the trees
-and the parallel computation of the predictions through the ``n_jobs``
-parameter. If ``n_jobs=k`` then computations are partitioned into
-``k`` jobs, and run on ``k`` cores of the machine. If ``n_jobs=-1``
-then all cores available on the machine are used. Note that because of
-inter-process communication overhead, the speedup might not be linear
-(i.e., using ``k`` jobs will unfortunately not be ``k`` times as
-fast). Significant speedup can still be achieved though when building
-a large number of trees, or when building a single tree requires a fair
-amount of time (e.g., on large datasets).
+**Split finding with categorical features**: The canonical way of considering
+categorical splits in a tree is to consider
+all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of
+categories. This can quickly become prohibitive when :math:`K` is large.
+Fortunately, since gradient boosting trees are always regression trees (even
+for classification problems), there exist a faster strategy that can yield
+equivalent splits. First, the categories of a feature are sorted according to
+the variance of the target, for each category `k`. Once the categories are
+sorted, one can consider *continuous partitions*, i.e. treat the categories
+as if they were ordered continuous values (see Fisher [Fisher1958]_ for a
+formal proof). As a result, only :math:`K - 1` splits need to be considered
+instead of :math:`2^{K - 1} - 1`. The initial sorting is a
+:math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of
+:math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`.
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_iris.py`
- * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py`
- * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py`
+  * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`
 
-.. topic:: References
+.. _monotonic_cst_gbdt:
 
- .. [B2001] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
+Monotonic Constraints
+^^^^^^^^^^^^^^^^^^^^^
 
- .. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998.
+Depending on the problem at hand, you may have prior knowledge indicating
+that a given feature should in general have a positive (or negative) effect
+on the target value. For example, all else being equal, a higher credit
+score should increase the probability of getting approved for a loan.
+Monotonic constraints allow you to incorporate such prior knowledge into the
+model.
 
- * P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized
-   trees", Machine Learning, 63(1), 3-42, 2006.
+For a predictor :math:`F` with two features:
 
-.. _random_forest_feature_importance:
+ - a **monotonic increase constraint** is a constraint of the form:
+    .. math::
+        x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2)
 
-Feature importance evaluation
------------------------------
+ - a **monotonic decrease constraint** is a constraint of the form:
+    .. math::
+        x_1 \leq x_1' \implies F(x_1, x_2) \geq F(x_1', x_2)
 
-The relative rank (i.e. depth) of a feature used as a decision node in a
-tree can be used to assess the relative importance of that feature with
-respect to the predictability of the target variable. Features used at
-the top of the tree contribute to the final prediction decision of a
-larger fraction of the input samples. The **expected fraction of the
-samples** they contribute to can thus be used as an estimate of the
-**relative importance of the features**. In scikit-learn, the fraction of
-samples a feature contributes to is combined with the decrease in impurity
-from splitting them to create a normalized estimate of the predictive power
-of that feature.
+You can specify a monotonic constraint on each feature using the
+`monotonic_cst` parameter. For each feature, a value of 0 indicates no
+constraint, while 1 and -1 indicate a monotonic increase and
+monotonic decrease constraint, respectively::
 
-By **averaging** the estimates of predictive ability over several randomized
-trees one can **reduce the variance** of such an estimate and use it
-for feature selection. This is known as the mean decrease in impurity, or MDI.
-Refer to [L2014]_ for more information on MDI and feature importance
-evaluation with Random Forests.
+  >>> from sklearn.ensemble import HistGradientBoostingRegressor
 
-.. warning::
+  ... # monotonic increase, monotonic decrease, and no constraint on the 3 features
+  >>> gbdt = HistGradientBoostingRegressor(monotonic_cst=[1, -1, 0])
 
-  The impurity-based feature importances computed on tree-based models suffer
-  from two flaws that can lead to misleading conclusions. First they are
-  computed on statistics derived from the training dataset and therefore **do
-  not necessarily inform us on which features are most important to make good
-  predictions on held-out dataset**. Secondly, **they favor high cardinality
-  features**, that is features with many unique values.
-  :ref:`permutation_importance` is an alternative to impurity-based feature
-  importance that does not suffer from these flaws. These two methods of
-  obtaining feature importance are explored in:
-  :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py`.
+In a binary classification context, imposing a monotonic increase (decrease) constraint means that higher values of the feature are supposed
+to have a positive (negative) effect on the probability of samples
+to belong to the positive class.
 
-The following example shows a color-coded representation of the relative
-importances of each individual pixel for a face recognition task using
-a :class:`ExtraTreesClassifier` model.
+Nevertheless, monotonic constraints only marginally constrain feature effects on the output.
+For instance, monotonic increase and decrease constraints cannot be used to enforce the
+following modelling constraint:
 
-.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_importances_faces_001.png
-   :target: ../auto_examples/ensemble/plot_forest_importances_faces.html
-   :align: center
-   :scale: 75
+    .. math::
+        x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2')
 
-In practice those estimates are stored as an attribute named
-``feature_importances_`` on the fitted model. This is an array with shape
-``(n_features,)`` whose values are positive and sum to 1.0. The higher
-the value, the more important is the contribution of the matching feature
-to the prediction function.
+Also, monotonic constraints are not supported for multiclass classification.
+
+.. note::
+    Since categories are unordered quantities, it is not possible to enforce
+    monotonic constraints on categorical features.
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py`
- * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py`
+  * :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py`
 
-.. topic:: References
+.. _interaction_cst_hgbt:
 
- .. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to
-    Practice" <1407.7502>`,
-    PhD Thesis, U. of Liege, 2014.
+Interaction constraints
+^^^^^^^^^^^^^^^^^^^^^^^
 
-.. _random_trees_embedding:
+A priori, the histogram gradient boosted trees are allowed to use any feature
+to split a node into child nodes. This creates so called interactions between
+features, i.e. usage of different features as split along a branch. Sometimes,
+one wants to restrict the possible interactions, see [Mayer2022]_. This can be
+done by the parameter ``interaction_cst``, where one can specify the indices
+of features that are allowed to interact.
+For instance, with 3 features in total, ``interaction_cst=[{0}, {1}, {2}]``
+forbids all interactions.
+The constraints ``[{0, 1}, {1, 2}]`` specifies two groups of possibly
+interacting features. Features 0 and 1 may interact with each other, as well
+as features 1 and 2. But note that features 0 and 2 are forbidden to interact.
+The following depicts a tree and the possible splits of the tree:
 
-Totally Random Trees Embedding
-------------------------------
+.. code-block:: none
 
-:class:`RandomTreesEmbedding` implements an unsupervised transformation of the
-data.  Using a forest of completely random trees, :class:`RandomTreesEmbedding`
-encodes the data by the indices of the leaves a data point ends up in.  This
-index is then encoded in a one-of-K manner, leading to a high dimensional,
-sparse binary coding.
-This coding can be computed very efficiently and can then be used as a basis
-for other learning tasks.
-The size and sparsity of the code can be influenced by choosing the number of
-trees and the maximum depth per tree. For each tree in the ensemble, the coding
-contains one entry of one. The size of the coding is at most ``n_estimators * 2
-** max_depth``, the maximum number of leaves in the forest.
+      1      <- Both constraint groups could be applied from now on
+     / \
+    1   2    <- Left split still fulfills both constraint groups.
+   / \ / \      Right split at feature 2 has only group {1, 2} from now on.
 
-As neighboring data points are more likely to lie within the same leaf of a
-tree, the transformation performs an implicit, non-parametric density
-estimation.
+LightGBM uses the same logic for overlapping groups.
 
-.. topic:: Examples:
+Note that features not listed in ``interaction_cst`` are automatically
+assigned an interaction group for themselves. With again 3 features, this
+means that ``[{0}]`` is equivalent to ``[{0}, {1, 2}]``.
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_random_forest_embedding.py`
+.. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` compares non-linear
-   dimensionality reduction techniques on handwritten digits.
+ * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_feature_transformation.py` compares
-   supervised and unsupervised tree based feature transformations.
+.. topic:: References
 
-.. seealso::
+  .. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio.
+     2022. :doi:`Machine Learning Applications to Land and Structure Valuation
+     <10.3390/jrfm15050193>`.
+     Journal of Risk and Financial Management 15, no. 5: 193
 
-   :ref:`manifold` techniques can also be useful to derive non-linear
-   representations of feature space, also these approaches focus also on
-   dimensionality reduction.
+Low-level parallelism
+^^^^^^^^^^^^^^^^^^^^^
 
 
-.. _adaboost:
+:class:`HistGradientBoostingClassifier` and
+:class:`HistGradientBoostingRegressor` use OpenMP
+for parallelization through Cython. For more details on how to control the
+number of threads, please refer to our :ref:`parallelism` notes.
 
-AdaBoost
-========
+The following parts are parallelized:
 
-The module :mod:`sklearn.ensemble` includes the popular boosting algorithm
-AdaBoost, introduced in 1995 by Freund and Schapire [FS1995]_.
-
-The core principle of AdaBoost is to fit a sequence of weak learners (i.e.,
-models that are only slightly better than random guessing, such as small
-decision trees) on repeatedly modified versions of the data. The predictions
-from all of them are then combined through a weighted majority vote (or sum) to
-produce the final prediction. The data modifications at each so-called boosting
-iteration consist of applying weights :math:`w_1`, :math:`w_2`, ..., :math:`w_N`
-to each of the training samples. Initially, those weights are all set to
-:math:`w_i = 1/N`, so that the first step simply trains a weak learner on the
-original data. For each successive iteration, the sample weights are
-individually modified and the learning algorithm is reapplied to the reweighted
-data. At a given step, those training examples that were incorrectly predicted
-by the boosted model induced at the previous step have their weights increased,
-whereas the weights are decreased for those that were predicted correctly. As
-iterations proceed, examples that are difficult to predict receive
-ever-increasing influence. Each subsequent weak learner is thereby forced to
-concentrate on the examples that are missed by the previous ones in the sequence
-[HTF]_.
-
-.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_adaboost_hastie_10_2_001.png
-   :target: ../auto_examples/ensemble/plot_adaboost_hastie_10_2.html
-   :align: center
-   :scale: 75
-
-AdaBoost can be used both for classification and regression problems:
-
-  - For multi-class classification, :class:`AdaBoostClassifier` implements
-    AdaBoost-SAMME and AdaBoost-SAMME.R [ZZRH2009]_.
-
-  - For regression, :class:`AdaBoostRegressor` implements AdaBoost.R2 [D1997]_.
-
-Usage
------
-
-The following example shows how to fit an AdaBoost classifier with 100 weak
-learners::
-
-    >>> from sklearn.model_selection import cross_val_score
-    >>> from sklearn.datasets import load_iris
-    >>> from sklearn.ensemble import AdaBoostClassifier
-
-    >>> X, y = load_iris(return_X_y=True)
-    >>> clf = AdaBoostClassifier(n_estimators=100)
-    >>> scores = cross_val_score(clf, X, y, cv=5)
-    >>> scores.mean()
-    0.9...
+- mapping samples from real values to integer-valued bins (finding the bin
+  thresholds is however sequential)
+- building histograms is parallelized over features
+- finding the best split point at a node is parallelized over features
+- during fit, mapping samples into the left and right children is
+  parallelized over samples
+- gradient and hessians computations are parallelized over samples
+- predicting is parallelized over samples
 
-The number of weak learners is controlled by the parameter ``n_estimators``. The
-``learning_rate`` parameter controls the contribution of the weak learners in
-the final combination. By default, weak learners are decision stumps. Different
-weak learners can be specified through the ``estimator`` parameter.
-The main parameters to tune to obtain good results are ``n_estimators`` and
-the complexity of the base estimators (e.g., its depth ``max_depth`` or
-minimum required number of samples to consider a split ``min_samples_split``).
+.. _Why_it's_faster:
 
-.. topic:: Examples:
+Why it's faster
+^^^^^^^^^^^^^^^
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_hastie_10_2.py` compares the
-   classification error of a decision stump, decision tree, and a boosted
-   decision stump using AdaBoost-SAMME and AdaBoost-SAMME.R.
+The bottleneck of a gradient boosting procedure is building the decision
+trees. Building a traditional decision tree (as in the other GBDTs
+:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`)
+requires sorting the samples at each node (for
+each feature). Sorting is needed so that the potential gain of a split point
+can be computed efficiently. Splitting a single node has thus a complexity
+of :math:`\mathcal{O}(n_\text{features} \times n \log(n))` where :math:`n`
+is the number of samples at the node.
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` shows the performance
-   of AdaBoost-SAMME and AdaBoost-SAMME.R on a multi-class problem.
+:class:`HistGradientBoostingClassifier` and
+:class:`HistGradientBoostingRegressor`, in contrast, do not require sorting the
+feature values and instead use a data-structure called a histogram, where the
+samples are implicitly ordered. Building a histogram has a
+:math:`\mathcal{O}(n)` complexity, so the node splitting procedure has a
+:math:`\mathcal{O}(n_\text{features} \times n)` complexity, much smaller
+than the previous one. In addition, instead of considering :math:`n` split
+points, we consider only ``max_bins`` split points, which might be much
+smaller.
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py` shows the decision boundary
-   and decision function values for a non-linearly separable two-class problem
-   using AdaBoost-SAMME.
+In order to build histograms, the input data `X` needs to be binned into
+integer-valued bins. This binning procedure does require sorting the feature
+values, but it only happens once at the very beginning of the boosting process
+(not at each node, like in :class:`GradientBoostingClassifier` and
+:class:`GradientBoostingRegressor`).
 
- * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py` demonstrates regression
-   with the AdaBoost.R2 algorithm.
+Finally, many parts of the implementation of
+:class:`HistGradientBoostingClassifier` and
+:class:`HistGradientBoostingRegressor` are parallelized.
 
 .. topic:: References
 
- .. [FS1995] Y. Freund, and R. Schapire, "A Decision-Theoretic Generalization of
-             On-Line Learning and an Application to Boosting", 1997.
-
- .. [ZZRH2009] J. Zhu, H. Zou, S. Rosset, T. Hastie. "Multi-class AdaBoost",
-               2009.
-
- .. [D1997] H. Drucker. "Improving Regressors using Boosting Techniques", 1997.
-
- .. [HTF] T. Hastie, R. Tibshirani and J. Friedman, "Elements of
-              Statistical Learning Ed. 2", Springer, 2009.
-
-
-.. _gradient_boosting:
-
-Gradient Tree Boosting
-======================
-
-`Gradient Tree Boosting <https://en.wikipedia.org/wiki/Gradient_boosting>`_
-or Gradient Boosted Decision Trees (GBDT) is a generalization
-of boosting to arbitrary differentiable loss functions, see the seminal work of
-[Friedman2001]_. GBDT is an accurate and effective off-the-shelf procedure that can be
-used for both regression and classification problems in a
-variety of areas including Web search ranking and ecology.
-
-The module :mod:`sklearn.ensemble` provides methods
-for both classification and regression via gradient boosted decision
-trees.
-
-.. note::
-
-  Scikit-learn 0.21 introduces two new implementations of
-  gradient boosting trees, namely :class:`HistGradientBoostingClassifier`
-  and :class:`HistGradientBoostingRegressor`, inspired by
-  `LightGBM <https://github.com/Microsoft/LightGBM>`__ (See [LightGBM]_).
+  .. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree
+     Boosting System" <1603.02754>`
 
-  These histogram-based estimators can be **orders of magnitude faster**
-  than :class:`GradientBoostingClassifier` and
-  :class:`GradientBoostingRegressor` when the number of samples is larger
-  than tens of thousands of samples.
+  .. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient
+     BoostingDecision Tree" <https://papers.nips.cc/paper/
+     6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree>`_
 
-  They also have built-in support for missing values, which avoids the need
-  for an imputer.
+  .. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity"
+     <http://csiss.ncgia.ucsb.edu/SPACE/workshops/2004/SAC/files/fisher.pdf>`_
+     Journal of the American Statistical Association, 53, 789-798.
 
-  These estimators are described in more detail below in
-  :ref:`histogram_based_gradient_boosting`.
 
-  The following guide focuses on :class:`GradientBoostingClassifier` and
-  :class:`GradientBoostingRegressor`, which might be preferred for small
-  sample sizes since binning may lead to split points that are too approximate
-  in this setting.
 
+:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`
+----------------------------------------------------------------------------
 
 The usage and the parameters of :class:`GradientBoostingClassifier` and
 :class:`GradientBoostingRegressor` are described below. The 2 most important
 parameters of these estimators are `n_estimators` and `learning_rate`.
 
 Classification
----------------
+^^^^^^^^^^^^^^^
 
 :class:`GradientBoostingClassifier` supports both binary and multi-class
 classification.
@@ -566,7 +473,7 @@ depth via ``max_depth`` or by setting the number of leaf nodes via
    :class:`GradientBoostingClassifier` .
 
 Regression
-----------
+^^^^^^^^^^^
 
 :class:`GradientBoostingRegressor` supports a number of
 :ref:`different loss functions <gradient_boosting_loss>`
@@ -596,8 +503,8 @@ with least squares loss and 500 base learners to the diabetes dataset
 (:func:`sklearn.datasets.load_diabetes`).
 The plot shows the train and test error at each iteration.
 The train error at each iteration is stored in the
-:attr:`~GradientBoostingRegressor.train_score_` attribute
-of the gradient boosting model. The test error at each iterations can be obtained
+`train_score_` attribute of the gradient boosting model.
+The test error at each iterations can be obtained
 via the :meth:`~GradientBoostingRegressor.staged_predict` method which returns a
 generator that yields the predictions at each stage. Plots like these can be used
 to determine the optimal number of trees (i.e. ``n_estimators``) by early stopping.
@@ -615,7 +522,7 @@ to determine the optimal number of trees (i.e. ``n_estimators``) by early stoppi
 .. _gradient_boosting_warm_start:
 
 Fitting additional weak-learners
---------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Both :class:`GradientBoostingRegressor` and :class:`GradientBoostingClassifier`
 support ``warm_start=True`` which allows you to add more estimators to an already
@@ -631,7 +538,7 @@ fitted model.
 .. _gradient_boosting_tree_size:
 
 Controlling the tree size
--------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The size of the regression tree base learners defines the level of variable
 interactions that can be captured by the gradient boosting model. In general,
@@ -658,13 +565,13 @@ chapter on gradient boosting in [Friedman2001]_ and is related to the parameter
 ``interaction.depth`` in R's gbm package where ``max_leaf_nodes == interaction.depth + 1`` .
 
 Mathematical formulation
--------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^
 
 We first present GBRT for regression, and then detail the classification
 case.
 
 Regression
-^^^^^^^^^^
+...........
 
 GBRT regressors are additive models whose prediction :math:`\hat{y}_i` for a
 given input :math:`x_i` is of the following form:
@@ -747,7 +654,7 @@ space.
   a leaf is updated to the median of the samples in that leaf.
 
 Classification
-^^^^^^^^^^^^^^
+..............
 
 Gradient boosting for classification is very similar to the regression case.
 However, the sum of the trees :math:`F_M(x_i) = \sum_m h_m(x_i)` is not
@@ -771,7 +678,7 @@ quantities.
 .. _gradient_boosting_loss:
 
 Loss Functions
---------------
+^^^^^^^^^^^^^^
 
 The following loss functions are supported and can be specified using
 the parameter ``loss``:
@@ -814,7 +721,7 @@ the parameter ``loss``:
 .. _gradient_boosting_shrinkage:
 
 Shrinkage via learning rate
----------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 [Friedman2001]_ proposed a simple regularization strategy that scales
 the contribution of each weak learner by a constant factor :math:`\nu`:
@@ -838,7 +745,7 @@ stopping. For a more detailed discussion of the interaction between
 ``learning_rate`` and ``n_estimators`` see [R2007]_.
 
 Subsampling
------------
+^^^^^^^^^^^^
 
 [Friedman2002]_ proposed stochastic gradient boosting, which combines gradient
 boosting with bootstrap averaging (bagging). At each iteration
@@ -867,10 +774,9 @@ parameter.
 Stochastic gradient boosting allows to compute out-of-bag estimates of the
 test deviance by computing the improvement in deviance on the examples that are
 not included in the bootstrap sample (i.e. the out-of-bag examples).
-The improvements are stored in the attribute
-:attr:`~GradientBoostingRegressor.oob_improvement_`. ``oob_improvement_[i]`` holds
-the improvement in terms of the loss on the OOB samples if you add the i-th stage
-to the current predictions.
+The improvements are stored in the attribute `oob_improvement_`.
+``oob_improvement_[i]`` holds the improvement in terms of the loss on the OOB samples
+if you add the i-th stage to the current predictions.
 Out-of-bag estimates can be used for model selection, for example to determine
 the optimal number of iterations. OOB estimates are usually very pessimistic thus
 we recommend to use cross-validation instead and only use OOB if cross-validation
@@ -883,7 +789,7 @@ is too time consuming.
  * :ref:`sphx_glr_auto_examples_ensemble_plot_ensemble_oob.py`
 
 Interpretation with feature importance
---------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Individual decision trees can be interpreted easily by simply
 visualizing the tree structure. Gradient boosting models, however,
@@ -940,375 +846,371 @@ based on permutation of the features.
   .. [R2007] G. Ridgeway (2006). `Generalized Boosted Models: A guide to the gbm
      package <https://cran.r-project.org/web/packages/gbm/vignettes/gbm.pdf>`_
 
-.. _histogram_based_gradient_boosting:
+.. _forest:
 
-Histogram-Based Gradient Boosting
-=================================
+Random forests and other randomized tree ensembles
+===================================================
 
-Scikit-learn 0.21 introduced two new implementations of
-gradient boosting trees, namely :class:`HistGradientBoostingClassifier`
-and :class:`HistGradientBoostingRegressor`, inspired by
-`LightGBM <https://github.com/Microsoft/LightGBM>`__ (See [LightGBM]_).
+The :mod:`sklearn.ensemble` module includes two averaging algorithms based
+on randomized :ref:`decision trees <tree>`: the RandomForest algorithm
+and the Extra-Trees method. Both algorithms are perturb-and-combine
+techniques [B1998]_ specifically designed for trees. This means a diverse
+set of classifiers is created by introducing randomness in the classifier
+construction.  The prediction of the ensemble is given as the averaged
+prediction of the individual classifiers.
 
-These histogram-based estimators can be **orders of magnitude faster**
-than :class:`GradientBoostingClassifier` and
-:class:`GradientBoostingRegressor` when the number of samples is larger
-than tens of thousands of samples.
+As other classifiers, forest classifiers have to be fitted with two
+arrays: a sparse or dense array X of shape ``(n_samples, n_features)``
+holding the training samples, and an array Y of shape ``(n_samples,)``
+holding the target values (class labels) for the training samples::
 
-They also have built-in support for missing values, which avoids the need
-for an imputer.
+    >>> from sklearn.ensemble import RandomForestClassifier
+    >>> X = [[0, 0], [1, 1]]
+    >>> Y = [0, 1]
+    >>> clf = RandomForestClassifier(n_estimators=10)
+    >>> clf = clf.fit(X, Y)
 
-These fast estimators first bin the input samples ``X`` into
-integer-valued bins (typically 256 bins) which tremendously reduces the
-number of splitting points to consider, and allows the algorithm to
-leverage integer-based data structures (histograms) instead of relying on
-sorted continuous values when building the trees. The API of these
-estimators is slightly different, and some of the features from
-:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`
-are not yet supported, for instance some loss functions.
+Like :ref:`decision trees <tree>`, forests of trees also extend to
+:ref:`multi-output problems <tree_multioutput>`  (if Y is an array
+of shape ``(n_samples, n_outputs)``).
 
-.. topic:: Examples:
+Random Forests
+--------------
 
- * :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py`
+In random forests (see :class:`RandomForestClassifier` and
+:class:`RandomForestRegressor` classes), each tree in the ensemble is built
+from a sample drawn with replacement (i.e., a bootstrap sample) from the
+training set.
 
-Usage
------
+Furthermore, when splitting each node during the construction of a tree, the
+best split is found either from all input features or a random subset of size
+``max_features``. (See the :ref:`parameter tuning guidelines
+<random_forest_parameters>` for more details).
 
-Most of the parameters are unchanged from
-:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`.
-One exception is the ``max_iter`` parameter that replaces ``n_estimators``, and
-controls the number of iterations of the boosting process::
+The purpose of these two sources of randomness is to decrease the variance of
+the forest estimator. Indeed, individual decision trees typically exhibit high
+variance and tend to overfit. The injected randomness in forests yield decision
+trees with somewhat decoupled prediction errors. By taking an average of those
+predictions, some errors can cancel out. Random forests achieve a reduced
+variance by combining diverse trees, sometimes at the cost of a slight increase
+in bias. In practice the variance reduction is often significant hence yielding
+an overall better model.
 
-  >>> from sklearn.ensemble import HistGradientBoostingClassifier
-  >>> from sklearn.datasets import make_hastie_10_2
+In contrast to the original publication [B2001]_, the scikit-learn
+implementation combines classifiers by averaging their probabilistic
+prediction, instead of letting each classifier vote for a single class.
 
-  >>> X, y = make_hastie_10_2(random_state=0)
-  >>> X_train, X_test = X[:2000], X[2000:]
-  >>> y_train, y_test = y[:2000], y[2000:]
+A competitive alternative to random forests are
+:ref:`histogram_based_gradient_boosting` (HGBT) models:
 
-  >>> clf = HistGradientBoostingClassifier(max_iter=100).fit(X_train, y_train)
-  >>> clf.score(X_test, y_test)
-  0.8965
+-  Building trees: Random forests typically rely on deep trees (that overfit
+   individually) which uses much computational resources, as they require
+   several splittings and evaluations of candidate splits. Boosting models
+   build shallow trees (that underfit individually) which are faster to fit
+   and predict.
 
-Available losses for regression are 'squared_error',
-'absolute_error', which is less sensitive to outliers, and
-'poisson', which is well suited to model counts and frequencies. For
-classification, 'log_loss' is the only option. For binary classification it uses the
-binary log loss, also known as binomial deviance or binary cross-entropy. For
-`n_classes >= 3`, it uses the multi-class log loss function, with multinomial deviance
-and categorical cross-entropy as alternative names. The appropriate loss version is
-selected based on :term:`y` passed to :term:`fit`.
+-  Sequential boosting: In HGBT, the decision trees are built sequentially,
+   where each tree is trained to correct the errors made by the previous ones.
+   This allows them to iteratively improve the model's performance using
+   relatively few trees. In contrast, random forests use a majority vote to
+   predict the outcome, which can require a larger number of trees to achieve
+   the same level of accuracy.
 
-The size of the trees can be controlled through the ``max_leaf_nodes``,
-``max_depth``, and ``min_samples_leaf`` parameters.
+-  Efficient binning: HGBT uses an efficient binning algorithm that can handle
+   large datasets with a high number of features. The binning algorithm can
+   pre-process the data to speed up the subsequent tree construction (see
+   :ref:`Why it's faster <Why_it's_faster>`). In contrast, the scikit-learn
+   implementation of random forests does not use binning and relies on exact
+   splitting, which can be computationally expensive.
 
-The number of bins used to bin the data is controlled with the ``max_bins``
-parameter. Using less bins acts as a form of regularization. It is
-generally recommended to use as many bins as possible, which is the default.
+Overall, the computational cost of HGBT versus RF depends on the specific
+characteristics of the dataset and the modeling task. It's a good idea
+to try both models and compare their performance and computational efficiency
+on your specific problem to determine which model is the best fit.
 
-The ``l2_regularization`` parameter is a regularizer on the loss function and
-corresponds to :math:`\lambda` in equation (2) of [XGBoost]_.
+.. topic:: Examples:
 
-Note that **early-stopping is enabled by default if the number of samples is
-larger than 10,000**. The early-stopping behaviour is controlled via the
-``early_stopping``, ``scoring``, ``validation_fraction``,
-``n_iter_no_change``, and ``tol`` parameters. It is possible to early-stop
-using an arbitrary :term:`scorer`, or just the training or validation loss.
-Note that for technical reasons, using a scorer is significantly slower than
-using the loss. By default, early-stopping is performed if there are at least
-10,000 samples in the training set, using the validation loss.
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_hist_grad_boosting_comparison.py`
 
-Missing values support
-----------------------
-
-:class:`HistGradientBoostingClassifier` and
-:class:`HistGradientBoostingRegressor` have built-in support for missing
-values (NaNs).
-
-During training, the tree grower learns at each split point whether samples
-with missing values should go to the left or right child, based on the
-potential gain. When predicting, samples with missing values are assigned to
-the left or right child consequently::
-
-  >>> from sklearn.ensemble import HistGradientBoostingClassifier
-  >>> import numpy as np
-
-  >>> X = np.array([0, 1, 2, np.nan]).reshape(-1, 1)
-  >>> y = [0, 0, 1, 1]
-
-  >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1).fit(X, y)
-  >>> gbdt.predict(X)
-  array([0, 0, 1, 1])
+Extremely Randomized Trees
+--------------------------
 
-When the missingness pattern is predictive, the splits can be done on
-whether the feature value is missing or not::
+In extremely randomized trees (see :class:`ExtraTreesClassifier`
+and :class:`ExtraTreesRegressor` classes), randomness goes one step
+further in the way splits are computed. As in random forests, a random
+subset of candidate features is used, but instead of looking for the
+most discriminative thresholds, thresholds are drawn at random for each
+candidate feature and the best of these randomly-generated thresholds is
+picked as the splitting rule. This usually allows to reduce the variance
+of the model a bit more, at the expense of a slightly greater increase
+in bias::
 
-  >>> X = np.array([0, np.nan, 1, 2, np.nan]).reshape(-1, 1)
-  >>> y = [0, 1, 0, 0, 1]
-  >>> gbdt = HistGradientBoostingClassifier(min_samples_leaf=1,
-  ...                                       max_depth=2,
-  ...                                       learning_rate=1,
-  ...                                       max_iter=1).fit(X, y)
-  >>> gbdt.predict(X)
-  array([0, 1, 0, 0, 1])
+    >>> from sklearn.model_selection import cross_val_score
+    >>> from sklearn.datasets import make_blobs
+    >>> from sklearn.ensemble import RandomForestClassifier
+    >>> from sklearn.ensemble import ExtraTreesClassifier
+    >>> from sklearn.tree import DecisionTreeClassifier
 
-If no missing values were encountered for a given feature during training,
-then samples with missing values are mapped to whichever child has the most
-samples.
+    >>> X, y = make_blobs(n_samples=10000, n_features=10, centers=100,
+    ...     random_state=0)
 
-.. _sw_hgbdt:
+    >>> clf = DecisionTreeClassifier(max_depth=None, min_samples_split=2,
+    ...     random_state=0)
+    >>> scores = cross_val_score(clf, X, y, cv=5)
+    >>> scores.mean()
+    0.98...
 
-Sample weight support
----------------------
+    >>> clf = RandomForestClassifier(n_estimators=10, max_depth=None,
+    ...     min_samples_split=2, random_state=0)
+    >>> scores = cross_val_score(clf, X, y, cv=5)
+    >>> scores.mean()
+    0.999...
 
-:class:`HistGradientBoostingClassifier` and
-:class:`HistGradientBoostingRegressor` sample support weights during
-:term:`fit`.
+    >>> clf = ExtraTreesClassifier(n_estimators=10, max_depth=None,
+    ...     min_samples_split=2, random_state=0)
+    >>> scores = cross_val_score(clf, X, y, cv=5)
+    >>> scores.mean() > 0.999
+    True
 
-The following toy example demonstrates how the model ignores the samples with
-zero sample weights:
+.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_iris_001.png
+    :target: ../auto_examples/ensemble/plot_forest_iris.html
+    :align: center
+    :scale: 75%
 
-    >>> X = [[1, 0],
-    ...      [1, 0],
-    ...      [1, 0],
-    ...      [0, 1]]
-    >>> y = [0, 0, 1, 0]
-    >>> # ignore the first 2 training samples by setting their weight to 0
-    >>> sample_weight = [0, 0, 1, 1]
-    >>> gb = HistGradientBoostingClassifier(min_samples_leaf=1)
-    >>> gb.fit(X, y, sample_weight=sample_weight)
-    HistGradientBoostingClassifier(...)
-    >>> gb.predict([[1, 0]])
-    array([1])
-    >>> gb.predict_proba([[1, 0]])[0, 1]
-    0.99...
+.. _random_forest_parameters:
 
-As you can see, the `[1, 0]` is comfortably classified as `1` since the first
-two samples are ignored due to their sample weights.
+Parameters
+----------
 
-Implementation detail: taking sample weights into account amounts to
-multiplying the gradients (and the hessians) by the sample weights. Note that
-the binning stage (specifically the quantiles computation) does not take the
-weights into account.
+The main parameters to adjust when using these methods is ``n_estimators`` and
+``max_features``. The former is the number of trees in the forest. The larger
+the better, but also the longer it will take to compute. In addition, note that
+results will stop getting significantly better beyond a critical number of
+trees. The latter is the size of the random subsets of features to consider
+when splitting a node. The lower the greater the reduction of variance, but
+also the greater the increase in bias. Empirical good default values are
+``max_features=1.0`` or equivalently ``max_features=None`` (always considering
+all features instead of a random subset) for regression problems, and
+``max_features="sqrt"`` (using a random subset of size ``sqrt(n_features)``)
+for classification tasks (where ``n_features`` is the number of features in
+the data). The default value of ``max_features=1.0`` is equivalent to bagged
+trees and more randomness can be achieved by setting smaller values (e.g. 0.3
+is a typical default in the literature). Good results are often achieved when
+setting ``max_depth=None`` in combination with ``min_samples_split=2`` (i.e.,
+when fully developing the trees). Bear in mind though that these values are
+usually not optimal, and might result in models that consume a lot of RAM.
+The best parameter values should always be cross-validated. In addition, note
+that in random forests, bootstrap samples are used by default
+(``bootstrap=True``) while the default strategy for extra-trees is to use the
+whole dataset (``bootstrap=False``). When using bootstrap sampling the
+generalization error can be estimated on the left out or out-of-bag samples.
+This can be enabled by setting ``oob_score=True``.
 
-.. _categorical_support_gbdt:
+.. note::
 
-Categorical Features Support
-----------------------------
+    The size of the model with the default parameters is :math:`O( M * N * log (N) )`,
+    where :math:`M` is the number of trees and :math:`N` is the number of samples.
+    In order to reduce the size of the model, you can change these parameters:
+    ``min_samples_split``, ``max_leaf_nodes``, ``max_depth`` and ``min_samples_leaf``.
 
-:class:`HistGradientBoostingClassifier` and
-:class:`HistGradientBoostingRegressor` have native support for categorical
-features: they can consider splits on non-ordered, categorical data.
+Parallelization
+---------------
 
-For datasets with categorical features, using the native categorical support
-is often better than relying on one-hot encoding
-(:class:`~sklearn.preprocessing.OneHotEncoder`), because one-hot encoding
-requires more tree depth to achieve equivalent splits. It is also usually
-better to rely on the native categorical support rather than to treat
-categorical features as continuous (ordinal), which happens for ordinal-encoded
-categorical data, since categories are nominal quantities where order does not
-matter.
+Finally, this module also features the parallel construction of the trees
+and the parallel computation of the predictions through the ``n_jobs``
+parameter. If ``n_jobs=k`` then computations are partitioned into
+``k`` jobs, and run on ``k`` cores of the machine. If ``n_jobs=-1``
+then all cores available on the machine are used. Note that because of
+inter-process communication overhead, the speedup might not be linear
+(i.e., using ``k`` jobs will unfortunately not be ``k`` times as
+fast). Significant speedup can still be achieved though when building
+a large number of trees, or when building a single tree requires a fair
+amount of time (e.g., on large datasets).
 
-To enable categorical support, a boolean mask can be passed to the
-`categorical_features` parameter, indicating which feature is categorical. In
-the following, the first feature will be treated as categorical and the
-second feature as numerical::
+.. topic:: Examples:
 
-  >>> gbdt = HistGradientBoostingClassifier(categorical_features=[True, False])
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_iris.py`
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py`
+ * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py`
 
-Equivalently, one can pass a list of integers indicating the indices of the
-categorical features::
+.. topic:: References
 
-  >>> gbdt = HistGradientBoostingClassifier(categorical_features=[0])
+ .. [B2001] L. Breiman, "Random Forests", Machine Learning, 45(1), 5-32, 2001.
 
-The cardinality of each categorical feature should be less than the `max_bins`
-parameter, and each categorical feature is expected to be encoded in
-`[0, max_bins - 1]`. To that end, it might be useful to pre-process the data
-with an :class:`~sklearn.preprocessing.OrdinalEncoder` as done in
-:ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`.
+ .. [B1998] L. Breiman, "Arcing Classifiers", Annals of Statistics 1998.
 
-If there are missing values during training, the missing values will be
-treated as a proper category. If there are no missing values during training,
-then at prediction time, missing values are mapped to the child node that has
-the most samples (just like for continuous features). When predicting,
-categories that were not seen during fit time will be treated as missing
-values.
+ * P. Geurts, D. Ernst., and L. Wehenkel, "Extremely randomized
+   trees", Machine Learning, 63(1), 3-42, 2006.
 
-**Split finding with categorical features**: The canonical way of considering
-categorical splits in a tree is to consider
-all of the :math:`2^{K - 1} - 1` partitions, where :math:`K` is the number of
-categories. This can quickly become prohibitive when :math:`K` is large.
-Fortunately, since gradient boosting trees are always regression trees (even
-for classification problems), there exist a faster strategy that can yield
-equivalent splits. First, the categories of a feature are sorted according to
-the variance of the target, for each category `k`. Once the categories are
-sorted, one can consider *continuous partitions*, i.e. treat the categories
-as if they were ordered continuous values (see Fisher [Fisher1958]_ for a
-formal proof). As a result, only :math:`K - 1` splits need to be considered
-instead of :math:`2^{K - 1} - 1`. The initial sorting is a
-:math:`\mathcal{O}(K \log(K))` operation, leading to a total complexity of
-:math:`\mathcal{O}(K \log(K) + K)`, instead of :math:`\mathcal{O}(2^K)`.
+.. _random_forest_feature_importance:
 
-.. topic:: Examples:
+Feature importance evaluation
+-----------------------------
 
-  * :ref:`sphx_glr_auto_examples_ensemble_plot_gradient_boosting_categorical.py`
+The relative rank (i.e. depth) of a feature used as a decision node in a
+tree can be used to assess the relative importance of that feature with
+respect to the predictability of the target variable. Features used at
+the top of the tree contribute to the final prediction decision of a
+larger fraction of the input samples. The **expected fraction of the
+samples** they contribute to can thus be used as an estimate of the
+**relative importance of the features**. In scikit-learn, the fraction of
+samples a feature contributes to is combined with the decrease in impurity
+from splitting them to create a normalized estimate of the predictive power
+of that feature.
 
-.. _monotonic_cst_gbdt:
+By **averaging** the estimates of predictive ability over several randomized
+trees one can **reduce the variance** of such an estimate and use it
+for feature selection. This is known as the mean decrease in impurity, or MDI.
+Refer to [L2014]_ for more information on MDI and feature importance
+evaluation with Random Forests.
 
-Monotonic Constraints
----------------------
+.. warning::
 
-Depending on the problem at hand, you may have prior knowledge indicating
-that a given feature should in general have a positive (or negative) effect
-on the target value. For example, all else being equal, a higher credit
-score should increase the probability of getting approved for a loan.
-Monotonic constraints allow you to incorporate such prior knowledge into the
-model.
+  The impurity-based feature importances computed on tree-based models suffer
+  from two flaws that can lead to misleading conclusions. First they are
+  computed on statistics derived from the training dataset and therefore **do
+  not necessarily inform us on which features are most important to make good
+  predictions on held-out dataset**. Secondly, **they favor high cardinality
+  features**, that is features with many unique values.
+  :ref:`permutation_importance` is an alternative to impurity-based feature
+  importance that does not suffer from these flaws. These two methods of
+  obtaining feature importance are explored in:
+  :ref:`sphx_glr_auto_examples_inspection_plot_permutation_importance.py`.
 
-For a predictor :math:`F` with two features:
+The following example shows a color-coded representation of the relative
+importances of each individual pixel for a face recognition task using
+a :class:`ExtraTreesClassifier` model.
 
- - a **monotonic increase constraint** is a constraint of the form:
-    .. math::
-        x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2)
+.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_forest_importances_faces_001.png
+   :target: ../auto_examples/ensemble/plot_forest_importances_faces.html
+   :align: center
+   :scale: 75
 
- - a **monotonic decrease constraint** is a constraint of the form:
-    .. math::
-        x_1 \leq x_1' \implies F(x_1, x_2) \geq F(x_1', x_2)
+In practice those estimates are stored as an attribute named
+``feature_importances_`` on the fitted model. This is an array with shape
+``(n_features,)`` whose values are positive and sum to 1.0. The higher
+the value, the more important is the contribution of the matching feature
+to the prediction function.
 
-You can specify a monotonic constraint on each feature using the
-`monotonic_cst` parameter. For each feature, a value of 0 indicates no
-constraint, while 1 and -1 indicate a monotonic increase and
-monotonic decrease constraint, respectively::
+.. topic:: Examples:
 
-  >>> from sklearn.ensemble import HistGradientBoostingRegressor
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances_faces.py`
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_forest_importances.py`
 
-  ... # monotonic increase, monotonic decrease, and no constraint on the 3 features
-  >>> gbdt = HistGradientBoostingRegressor(monotonic_cst=[1, -1, 0])
+.. topic:: References
 
-In a binary classification context, imposing a monotonic increase (decrease) constraint means that higher values of the feature are supposed
-to have a positive (negative) effect on the probability of samples
-to belong to the positive class.
+ .. [L2014] G. Louppe, :arxiv:`"Understanding Random Forests: From Theory to
+    Practice" <1407.7502>`,
+    PhD Thesis, U. of Liege, 2014.
 
-Nevertheless, monotonic constraints only marginally constrain feature effects on the output.
-For instance, monotonic increase and decrease constraints cannot be used to enforce the
-following modelling constraint:
+.. _random_trees_embedding:
 
-    .. math::
-        x_1 \leq x_1' \implies F(x_1, x_2) \leq F(x_1', x_2')
+Totally Random Trees Embedding
+------------------------------
 
-Also, monotonic constraints are not supported for multiclass classification.
+:class:`RandomTreesEmbedding` implements an unsupervised transformation of the
+data.  Using a forest of completely random trees, :class:`RandomTreesEmbedding`
+encodes the data by the indices of the leaves a data point ends up in.  This
+index is then encoded in a one-of-K manner, leading to a high dimensional,
+sparse binary coding.
+This coding can be computed very efficiently and can then be used as a basis
+for other learning tasks.
+The size and sparsity of the code can be influenced by choosing the number of
+trees and the maximum depth per tree. For each tree in the ensemble, the coding
+contains one entry of one. The size of the coding is at most ``n_estimators * 2
+** max_depth``, the maximum number of leaves in the forest.
 
-.. note::
-    Since categories are unordered quantities, it is not possible to enforce
-    monotonic constraints on categorical features.
+As neighboring data points are more likely to lie within the same leaf of a
+tree, the transformation performs an implicit, non-parametric density
+estimation.
 
 .. topic:: Examples:
 
-  * :ref:`sphx_glr_auto_examples_ensemble_plot_monotonic_constraints.py`
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_random_forest_embedding.py`
 
-.. _interaction_cst_hgbt:
+ * :ref:`sphx_glr_auto_examples_manifold_plot_lle_digits.py` compares non-linear
+   dimensionality reduction techniques on handwritten digits.
 
-Interaction constraints
------------------------
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_feature_transformation.py` compares
+   supervised and unsupervised tree based feature transformations.
 
-A priori, the histogram gradient boosting trees are allowed to use any feature
-to split a node into child nodes. This creates so called interactions between
-features, i.e. usage of different features as split along a branch. Sometimes,
-one wants to restrict the possible interactions, see [Mayer2022]_. This can be
-done by the parameter ``interaction_cst``, where one can specify the indices
-of features that are allowed to interact.
-For instance, with 3 features in total, ``interaction_cst=[{0}, {1}, {2}]``
-forbids all interactions.
-The constraints ``[{0, 1}, {1, 2}]`` specifies two groups of possibly
-interacting features. Features 0 and 1 may interact with each other, as well
-as features 1 and 2. But note that features 0 and 2 are forbidden to interact.
-The following depicts a tree and the possible splits of the tree:
+.. seealso::
 
-.. code-block:: none
+   :ref:`manifold` techniques can also be useful to derive non-linear
+   representations of feature space, also these approaches focus also on
+   dimensionality reduction.
 
-      1      <- Both constraint groups could be applied from now on
-     / \
-    1   2    <- Left split still fulfills both constraint groups.
-   / \ / \      Right split at feature 2 has only group {1, 2} from now on.
+.. _bagging:
 
-LightGBM uses the same logic for overlapping groups.
+Bagging meta-estimator
+======================
 
-Note that features not listed in ``interaction_cst`` are automatically
-assigned an interaction group for themselves. With again 3 features, this
-means that ``[{0}]`` is equivalent to ``[{0}, {1, 2}]``.
+In ensemble algorithms, bagging methods form a class of algorithms which build
+several instances of a black-box estimator on random subsets of the original
+training set and then aggregate their individual predictions to form a final
+prediction. These methods are used as a way to reduce the variance of a base
+estimator (e.g., a decision tree), by introducing randomization into its
+construction procedure and then making an ensemble out of it. In many cases,
+bagging methods constitute a very simple way to improve with respect to a
+single model, without making it necessary to adapt the underlying base
+algorithm. As they provide a way to reduce overfitting, bagging methods work
+best with strong and complex models (e.g., fully developed decision trees), in
+contrast with boosting methods which usually work best with weak models (e.g.,
+shallow decision trees).
 
-.. topic:: References
+Bagging methods come in many flavours but mostly differ from each other by the
+way they draw random subsets of the training set:
 
-  .. [Mayer2022] M. Mayer, S.C. Bourassa, M. Hoesli, and D.F. Scognamiglio.
-     2022. :doi:`Machine Learning Applications to Land and Structure Valuation
-     <10.3390/jrfm15050193>`.
-     Journal of Risk and Financial Management 15, no. 5: 193
+  * When random subsets of the dataset are drawn as random subsets of the
+    samples, then this algorithm is known as Pasting [B1999]_.
 
-Low-level parallelism
----------------------
+  * When samples are drawn with replacement, then the method is known as
+    Bagging [B1996]_.
 
-:class:`HistGradientBoostingClassifier` and
-:class:`HistGradientBoostingRegressor` have implementations that use OpenMP
-for parallelization through Cython. For more details on how to control the
-number of threads, please refer to our :ref:`parallelism` notes.
+  * When random subsets of the dataset are drawn as random subsets of
+    the features, then the method is known as Random Subspaces [H1998]_.
 
-The following parts are parallelized:
+  * Finally, when base estimators are built on subsets of both samples and
+    features, then the method is known as Random Patches [LG2012]_.
 
-- mapping samples from real values to integer-valued bins (finding the bin
-  thresholds is however sequential)
-- building histograms is parallelized over features
-- finding the best split point at a node is parallelized over features
-- during fit, mapping samples into the left and right children is
-  parallelized over samples
-- gradient and hessians computations are parallelized over samples
-- predicting is parallelized over samples
+In scikit-learn, bagging methods are offered as a unified
+:class:`BaggingClassifier` meta-estimator  (resp. :class:`BaggingRegressor`),
+taking as input a user-specified estimator along with parameters
+specifying the strategy to draw random subsets. In particular, ``max_samples``
+and ``max_features`` control the size of the subsets (in terms of samples and
+features), while ``bootstrap`` and ``bootstrap_features`` control whether
+samples and features are drawn with or without replacement. When using a subset
+of the available samples the generalization accuracy can be estimated with the
+out-of-bag samples by setting ``oob_score=True``. As an example, the
+snippet below illustrates how to instantiate a bagging ensemble of
+:class:`~sklearn.neighbors.KNeighborsClassifier` estimators, each built on random
+subsets of 50% of the samples and 50% of the features.
 
-.. _Why_it's_faster:
+    >>> from sklearn.ensemble import BaggingClassifier
+    >>> from sklearn.neighbors import KNeighborsClassifier
+    >>> bagging = BaggingClassifier(KNeighborsClassifier(),
+    ...                             max_samples=0.5, max_features=0.5)
 
-Why it's faster
----------------
+.. topic:: Examples:
 
-The bottleneck of a gradient boosting procedure is building the decision
-trees. Building a traditional decision tree (as in the other GBDTs
-:class:`GradientBoostingClassifier` and :class:`GradientBoostingRegressor`)
-requires sorting the samples at each node (for
-each feature). Sorting is needed so that the potential gain of a split point
-can be computed efficiently. Splitting a single node has thus a complexity
-of :math:`\mathcal{O}(n_\text{features} \times n \log(n))` where :math:`n`
-is the number of samples at the node.
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_bias_variance.py`
 
-:class:`HistGradientBoostingClassifier` and
-:class:`HistGradientBoostingRegressor`, in contrast, do not require sorting the
-feature values and instead use a data-structure called a histogram, where the
-samples are implicitly ordered. Building a histogram has a
-:math:`\mathcal{O}(n)` complexity, so the node splitting procedure has a
-:math:`\mathcal{O}(n_\text{features} \times n)` complexity, much smaller
-than the previous one. In addition, instead of considering :math:`n` split
-points, we here consider only ``max_bins`` split points, which is much
-smaller.
+.. topic:: References
 
-In order to build histograms, the input data `X` needs to be binned into
-integer-valued bins. This binning procedure does require sorting the feature
-values, but it only happens once at the very beginning of the boosting process
-(not at each node, like in :class:`GradientBoostingClassifier` and
-:class:`GradientBoostingRegressor`).
+  .. [B1999] L. Breiman, "Pasting small votes for classification in large
+         databases and on-line", Machine Learning, 36(1), 85-103, 1999.
 
-Finally, many parts of the implementation of
-:class:`HistGradientBoostingClassifier` and
-:class:`HistGradientBoostingRegressor` are parallelized.
+  .. [B1996] L. Breiman, "Bagging predictors", Machine Learning, 24(2),
+         123-140, 1996.
 
-.. topic:: References
+  .. [H1998] T. Ho, "The random subspace method for constructing decision
+         forests", Pattern Analysis and Machine Intelligence, 20(8), 832-844,
+         1998.
 
-  .. [XGBoost] Tianqi Chen, Carlos Guestrin, :arxiv:`"XGBoost: A Scalable Tree
-     Boosting System" <1603.02754>`
+  .. [LG2012] G. Louppe and P. Geurts, "Ensembles on Random Patches",
+         Machine Learning and Knowledge Discovery in Databases, 346-361, 2012.
 
-  .. [LightGBM] Ke et. al. `"LightGBM: A Highly Efficient Gradient
-     BoostingDecision Tree" <https://papers.nips.cc/paper/
-     6907-lightgbm-a-highly-efficient-gradient-boosting-decision-tree>`_
 
-  .. [Fisher1958] Fisher, W.D. (1958). `"On Grouping for Maximum Homogeneity"
-     <http://csiss.ncgia.ucsb.edu/SPACE/workshops/2004/SAC/files/fisher.pdf>`_
-     Journal of the American Statistical Association, 53, 789-798.
 
 .. _voting_classifier:
 
@@ -1643,3 +1545,97 @@ computationally expensive.
 
    .. [W1992] Wolpert, David H. "Stacked generalization." Neural networks 5.2
       (1992): 241-259.
+
+
+
+.. _adaboost:
+
+AdaBoost
+========
+
+The module :mod:`sklearn.ensemble` includes the popular boosting algorithm
+AdaBoost, introduced in 1995 by Freund and Schapire [FS1995]_.
+
+The core principle of AdaBoost is to fit a sequence of weak learners (i.e.,
+models that are only slightly better than random guessing, such as small
+decision trees) on repeatedly modified versions of the data. The predictions
+from all of them are then combined through a weighted majority vote (or sum) to
+produce the final prediction. The data modifications at each so-called boosting
+iteration consists of applying weights :math:`w_1`, :math:`w_2`, ..., :math:`w_N`
+to each of the training samples. Initially, those weights are all set to
+:math:`w_i = 1/N`, so that the first step simply trains a weak learner on the
+original data. For each successive iteration, the sample weights are
+individually modified and the learning algorithm is reapplied to the reweighted
+data. At a given step, those training examples that were incorrectly predicted
+by the boosted model induced at the previous step have their weights increased,
+whereas the weights are decreased for those that were predicted correctly. As
+iterations proceed, examples that are difficult to predict receive
+ever-increasing influence. Each subsequent weak learner is thereby forced to
+concentrate on the examples that are missed by the previous ones in the sequence
+[HTF]_.
+
+.. figure:: ../auto_examples/ensemble/images/sphx_glr_plot_adaboost_hastie_10_2_001.png
+   :target: ../auto_examples/ensemble/plot_adaboost_hastie_10_2.html
+   :align: center
+   :scale: 75
+
+AdaBoost can be used both for classification and regression problems:
+
+  - For multi-class classification, :class:`AdaBoostClassifier` implements
+    AdaBoost-SAMME and AdaBoost-SAMME.R [ZZRH2009]_.
+
+  - For regression, :class:`AdaBoostRegressor` implements AdaBoost.R2 [D1997]_.
+
+Usage
+-----
+
+The following example shows how to fit an AdaBoost classifier with 100 weak
+learners::
+
+    >>> from sklearn.model_selection import cross_val_score
+    >>> from sklearn.datasets import load_iris
+    >>> from sklearn.ensemble import AdaBoostClassifier
+
+    >>> X, y = load_iris(return_X_y=True)
+    >>> clf = AdaBoostClassifier(n_estimators=100)
+    >>> scores = cross_val_score(clf, X, y, cv=5)
+    >>> scores.mean()
+    0.9...
+
+The number of weak learners is controlled by the parameter ``n_estimators``. The
+``learning_rate`` parameter controls the contribution of the weak learners in
+the final combination. By default, weak learners are decision stumps. Different
+weak learners can be specified through the ``estimator`` parameter.
+The main parameters to tune to obtain good results are ``n_estimators`` and
+the complexity of the base estimators (e.g., its depth ``max_depth`` or
+minimum required number of samples to consider a split ``min_samples_split``).
+
+.. topic:: Examples:
+
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_hastie_10_2.py` compares the
+   classification error of a decision stump, decision tree, and a boosted
+   decision stump using AdaBoost-SAMME and AdaBoost-SAMME.R.
+
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_multiclass.py` shows the performance
+   of AdaBoost-SAMME and AdaBoost-SAMME.R on a multi-class problem.
+
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_twoclass.py` shows the decision boundary
+   and decision function values for a non-linearly separable two-class problem
+   using AdaBoost-SAMME.
+
+ * :ref:`sphx_glr_auto_examples_ensemble_plot_adaboost_regression.py` demonstrates regression
+   with the AdaBoost.R2 algorithm.
+
+.. topic:: References
+
+ .. [FS1995] Y. Freund, and R. Schapire, "A Decision-Theoretic Generalization of
+             On-Line Learning and an Application to Boosting", 1997.
+
+ .. [ZZRH2009] J. Zhu, H. Zou, S. Rosset, T. Hastie. "Multi-class AdaBoost",
+               2009.
+
+ .. [D1997] H. Drucker. "Improving Regressors using Boosting Techniques", 1997.
+
+ .. [HTF] T. Hastie, R. Tibshirani and J. Friedman, "Elements of
+              Statistical Learning Ed. 2", Springer, 2009.
+
diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index 1f4e974d6c087..9653ba9d7b646 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -206,8 +206,9 @@ Note the use of a generator comprehension,
 which introduces laziness into the feature extraction:
 tokens are only processed on demand from the hasher.
 
-Implementation details
-----------------------
+|details-start|
+**Implementation details**
+|details-split|
 
 :class:`FeatureHasher` uses the signed 32-bit variant of MurmurHash3.
 As a result (and because of limitations in ``scipy.sparse``),
@@ -223,6 +224,11 @@ Since a simple modulo is used to transform the hash function to a column index,
 it is advisable to use a power of two as the ``n_features`` parameter;
 otherwise the features will not be mapped evenly to the columns.
 
+.. topic:: References:
+
+  * `MurmurHash3 <https://github.com/aappleby/smhasher>`_.
+
+|details-end|
 
 .. topic:: References:
 
@@ -230,9 +236,6 @@ otherwise the features will not be mapped evenly to the columns.
    Josh Attenberg (2009). `Feature hashing for large scale multitask learning
    <https://alex.smola.org/papers/2009/Weinbergeretal09.pdf>`_. Proc. ICML.
 
- * `MurmurHash3 <https://github.com/aappleby/smhasher>`_.
-
-
 .. _text_feature_extraction:
 
 Text feature extraction
@@ -396,7 +399,7 @@ last document::
 .. _stop_words:
 
 Using stop words
-................
+----------------
 
 Stop words are words like "and", "the", "him", which are presumed to be
 uninformative in representing the content of a text, and which may be
@@ -426,6 +429,7 @@ identify and warn about some kinds of inconsistencies.
                <https://aclweb.org/anthology/W18-2502>`__.
                In *Proc. Workshop for NLP Open Source Software*.
 
+
 .. _tfidf:
 
 Tf–idf term weighting
@@ -490,6 +494,10 @@ class::
 Again please see the :ref:`reference documentation
 <text_feature_extraction_ref>` for the details on all the parameters.
 
+|details-start|
+**Numeric example of a tf-idf matrix**
+|details-split|
+
 Let's take an example with the following counts. The first term is present
 100% of the time hence not very interesting. The two other features only
 in less than 50% of the time hence probably more representative of the
@@ -609,6 +617,7 @@ feature extractor with a classifier:
 
  * :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`
 
+|details-end|
 
 Decoding text files
 -------------------
@@ -637,6 +646,10 @@ or ``"replace"``. See the documentation for the Python function
 ``bytes.decode`` for more details
 (type ``help(bytes.decode)`` at the Python prompt).
 
+|details-start|
+**Troubleshooting decoding text**
+|details-split|
+
 If you are having trouble decoding text, here are some things to try:
 
 - Find out what the actual encoding of the text is. The file might come
@@ -690,6 +703,7 @@ About Unicode <https://www.joelonsoftware.com/articles/Unicode.html>`_.
 
 .. _`ftfy`: https://github.com/LuminosoInsight/python-ftfy
 
+|details-end|
 
 Applications and examples
 -------------------------
@@ -870,8 +884,9 @@ The :class:`HashingVectorizer` also comes with the following limitations:
   model. A :class:`TfidfTransformer` can be appended to it in a pipeline if
   required.
 
-Performing out-of-core scaling with HashingVectorizer
-------------------------------------------------------
+|details-start|
+**Performing out-of-core scaling with HashingVectorizer**
+|details-split|
 
 An interesting development of using a :class:`HashingVectorizer` is the ability
 to perform `out-of-core`_ scaling. This means that we can learn from data that
@@ -890,6 +905,8 @@ time is often limited by the CPU time one wants to spend on the task.
 For a full-fledged example of out-of-core scaling in a text classification
 task see :ref:`sphx_glr_auto_examples_applications_plot_out_of_core_classification.py`.
 
+|details-end|
+
 Customizing the vectorizer classes
 ----------------------------------
 
@@ -928,6 +945,10 @@ parameters it is possible to derive from the class and override the
 ``build_preprocessor``, ``build_tokenizer`` and ``build_analyzer``
 factory methods instead of passing custom functions.
 
+|details-start|
+**Tips and tricks**
+|details-split|
+
 Some tips and tricks:
 
   * If documents are pre-tokenized by an external package, then store them in
@@ -982,6 +1003,8 @@ Some tips and tricks:
 Customizing the vectorizer can also be useful when handling Asian languages
 that do not use an explicit word separator such as whitespace.
 
+|details-end|
+
 .. _image_feature_extraction:
 
 Image feature extraction
diff --git a/doc/modules/feature_selection.rst b/doc/modules/feature_selection.rst
index ec902979d5600..496b27daf447e 100644
--- a/doc/modules/feature_selection.rst
+++ b/doc/modules/feature_selection.rst
@@ -201,30 +201,36 @@ alpha parameter, the fewer features selected.
 
 .. _compressive_sensing:
 
-.. topic:: **L1-recovery and compressive sensing**
-
-   For a good choice of alpha, the :ref:`lasso` can fully recover the
-   exact set of non-zero variables using only few observations, provided
-   certain specific conditions are met. In particular, the number of
-   samples should be "sufficiently large", or L1 models will perform at
-   random, where "sufficiently large" depends on the number of non-zero
-   coefficients, the logarithm of the number of features, the amount of
-   noise, the smallest absolute value of non-zero coefficients, and the
-   structure of the design matrix X. In addition, the design matrix must
-   display certain specific properties, such as not being too correlated.
-
-   There is no general rule to select an alpha parameter for recovery of
-   non-zero coefficients. It can by set by cross-validation
-   (:class:`LassoCV` or :class:`LassoLarsCV`), though this may lead to
-   under-penalized models: including a small number of non-relevant
-   variables is not detrimental to prediction score. BIC
-   (:class:`LassoLarsIC`) tends, on the opposite, to set high values of
-   alpha.
-
-   **Reference** Richard G. Baraniuk "Compressive Sensing", IEEE Signal
+|details-start|
+**L1-recovery and compressive sensing**
+|details-split|
+
+For a good choice of alpha, the :ref:`lasso` can fully recover the
+exact set of non-zero variables using only few observations, provided
+certain specific conditions are met. In particular, the number of
+samples should be "sufficiently large", or L1 models will perform at
+random, where "sufficiently large" depends on the number of non-zero
+coefficients, the logarithm of the number of features, the amount of
+noise, the smallest absolute value of non-zero coefficients, and the
+structure of the design matrix X. In addition, the design matrix must
+display certain specific properties, such as not being too correlated.
+
+There is no general rule to select an alpha parameter for recovery of
+non-zero coefficients. It can by set by cross-validation
+(:class:`~sklearn.linear_model.LassoCV` or
+:class:`~sklearn.linear_model.LassoLarsCV`), though this may lead to
+under-penalized models: including a small number of non-relevant variables
+is not detrimental to prediction score. BIC
+(:class:`~sklearn.linear_model.LassoLarsIC`) tends, on the opposite, to set
+high values of alpha.
+
+.. topic:: Reference
+
+   Richard G. Baraniuk "Compressive Sensing", IEEE Signal
    Processing Magazine [120] July 2007
    http://users.isr.ist.utl.pt/~aguiar/CS_notes.pdf
 
+|details-end|
 
 Tree-based feature selection
 ----------------------------
@@ -281,6 +287,10 @@ instead of starting with no features and greedily adding features, we start
 with *all* the features and greedily *remove* features from the set. The
 `direction` parameter controls whether forward or backward SFS is used.
 
+|details-start|
+**Detail on Sequential Feature Selection**
+|details-split|
+
 In general, forward and backward selection do not yield equivalent results.
 Also, one may be much faster than the other depending on the requested number
 of selected features: if we have 10 features and ask for 7 selected features,
@@ -298,16 +308,18 @@ cross-validation requires fitting `m * k` models, while
 :class:`~sklearn.feature_selection.SelectFromModel` always just does a single
 fit and requires no iterations.
 
-.. topic:: Examples
-
-    * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py`
-
-.. topic:: References:
+.. topic:: Reference
 
    .. [sfs] Ferri et al, `Comparative study of techniques for
       large-scale feature selection
       <https://citeseerx.ist.psu.edu/doc_view/pid/5fedabbb3957bbb442802e012d829ee0629a01b6>`_.
 
+|details-end|
+
+.. topic:: Examples
+
+    * :ref:`sphx_glr_auto_examples_feature_selection_plot_select_from_model_diabetes.py`
+
 Feature selection as part of a pipeline
 =======================================
 
diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst
index db490bc1309d3..75cbf95dae049 100644
--- a/doc/modules/gaussian_process.rst
+++ b/doc/modules/gaussian_process.rst
@@ -1,5 +1,3 @@
-
-
 .. _gaussian_process:
 
 ==================
@@ -8,7 +6,7 @@ Gaussian Processes
 
 .. currentmodule:: sklearn.gaussian_process
 
-**Gaussian Processes (GP)** are a generic supervised learning method designed
+**Gaussian Processes (GP)** are a nonparametric supervised learning method used
 to solve *regression* and *probabilistic classification* problems.
 
 The advantages of Gaussian processes are:
@@ -27,8 +25,8 @@ The advantages of Gaussian processes are:
 
 The disadvantages of Gaussian processes include:
 
-    - They are not sparse, i.e., they use the whole samples/features information to
-      perform the prediction.
+    - Our implementation is not sparse, i.e., they use the whole samples/features
+      information to perform the prediction.
 
     - They lose efficiency in high dimensional spaces -- namely when the number
       of features exceeds a few dozens.
@@ -42,31 +40,44 @@ Gaussian Process Regression (GPR)
 .. currentmodule:: sklearn.gaussian_process
 
 The :class:`GaussianProcessRegressor` implements Gaussian processes (GP) for
-regression purposes. For this, the prior of the GP needs to be specified. The
-prior mean is assumed to be constant and zero (for ``normalize_y=False``) or the
-training data's mean (for ``normalize_y=True``). The prior's
-covariance is specified by passing a :ref:`kernel <gp_kernels>` object. The
-hyperparameters of the kernel are optimized during fitting of
-GaussianProcessRegressor by maximizing the log-marginal-likelihood (LML) based
-on the passed ``optimizer``. As the LML may have multiple local optima, the
-optimizer can be started repeatedly by specifying ``n_restarts_optimizer``. The
-first run is always conducted starting from the initial hyperparameter values
-of the kernel; subsequent runs are conducted from hyperparameter values
-that have been chosen randomly from the range of allowed values.
-If the initial hyperparameters should be kept fixed, `None` can be passed as
-optimizer.
+regression purposes. For this, the prior of the GP needs to be specified. GP
+will combine this prior and the likelihood function based on training samples.
+It allows to give a probabilistic approach to prediction by giving the mean and
+standard deviation as output when predicting.
 
-The noise level in the targets can be specified by passing it via the
-parameter ``alpha``, either globally as a scalar or per datapoint.
-Note that a moderate noise level can also be helpful for dealing with numeric
-issues during fitting as it is effectively implemented as Tikhonov
-regularization, i.e., by adding it to the diagonal of the kernel matrix. An
-alternative to specifying the noise level explicitly is to include a
-WhiteKernel component into the kernel, which can estimate the global noise
-level from the data (see example below).
+.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_targets_002.png
+   :target: ../auto_examples/gaussian_process/plot_gpr_noisy_targets.html
+   :align: center
+
+The prior mean is assumed to be constant and zero (for `normalize_y=False`) or
+the training data's mean (for `normalize_y=True`). The prior's covariance is
+specified by passing a :ref:`kernel <gp_kernels>` object. The hyperparameters
+of the kernel are optimized when fitting the :class:`GaussianProcessRegressor`
+by maximizing the log-marginal-likelihood (LML) based on the passed
+`optimizer`. As the LML may have multiple local optima, the optimizer can be
+started repeatedly by specifying `n_restarts_optimizer`. The first run is
+always conducted starting from the initial hyperparameter values of the kernel;
+subsequent runs are conducted from hyperparameter values that have been chosen
+randomly from the range of allowed values. If the initial hyperparameters
+should be kept fixed, `None` can be passed as optimizer.
+
+The noise level in the targets can be specified by passing it via the parameter
+`alpha`, either globally as a scalar or per datapoint. Note that a moderate
+noise level can also be helpful for dealing with numeric instabilities during
+fitting as it is effectively implemented as Tikhonov regularization, i.e., by
+adding it to the diagonal of the kernel matrix. An alternative to specifying
+the noise level explicitly is to include a
+:class:`~sklearn.gaussian_process.kernels.WhiteKernel` component into the
+kernel, which can estimate the global noise level from the data (see example
+below). The figure below shows the effect of noisy target handled by setting
+the parameter `alpha`.
+
+.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_targets_003.png
+   :target: ../auto_examples/gaussian_process/plot_gpr_noisy_targets.html
+   :align: center
 
 The implementation is based on Algorithm 2.1 of [RW2006]_. In addition to
-the API of standard scikit-learn estimators, GaussianProcessRegressor:
+the API of standard scikit-learn estimators, :class:`GaussianProcessRegressor`:
 
 * allows prediction without prior fitting (based on the GP prior)
 
@@ -77,149 +88,12 @@ the API of standard scikit-learn estimators, GaussianProcessRegressor:
   externally for other ways of selecting hyperparameters, e.g., via
   Markov chain Monte Carlo.
 
+.. topic:: Examples
 
-GPR examples
-============
-
-GPR with noise-level estimation
--------------------------------
-This example illustrates that GPR with a sum-kernel including a WhiteKernel can
-estimate the noise level of data. An illustration of the
-log-marginal-likelihood (LML) landscape shows that there exist two local
-maxima of LML.
-
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_003.png
-   :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html
-   :align: center
-
-The first corresponds to a model with a high noise level and a
-large length scale, which explains all variations in the data by noise.
-
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_004.png
-   :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html
-   :align: center
-
-The second one has a smaller noise level and shorter length scale, which explains
-most of the variation by the noise-free functional relationship. The second
-model has a higher likelihood; however, depending on the initial value for the
-hyperparameters, the gradient-based optimization might also converge to the
-high-noise solution. It is thus important to repeat the optimization several
-times for different initializations.
-
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_noisy_005.png
-   :target: ../auto_examples/gaussian_process/plot_gpr_noisy.html
-   :align: center
-
-
-Comparison of GPR and Kernel Ridge Regression
----------------------------------------------
-
-Both kernel ridge regression (KRR) and GPR learn
-a target function by employing internally the "kernel trick". KRR learns a
-linear function in the space induced by the respective kernel which corresponds
-to a non-linear function in the original space. The linear function in the
-kernel space is chosen based on the mean-squared error loss with
-ridge regularization. GPR uses the kernel to define the covariance of
-a prior distribution over the target functions and uses the observed training
-data to define a likelihood function. Based on Bayes theorem, a (Gaussian)
-posterior distribution over target functions is defined, whose mean is used
-for prediction.
-
-A major difference is that GPR can choose the kernel's hyperparameters based
-on gradient-ascent on the marginal likelihood function while KRR needs to
-perform a grid search on a cross-validated loss function (mean-squared error
-loss). A further difference is that GPR learns a generative, probabilistic
-model of the target function and can thus provide meaningful confidence
-intervals and posterior samples along with the predictions while KRR only
-provides predictions.
-
-The following figure illustrates both methods on an artificial dataset, which
-consists of a sinusoidal target function and strong noise. The figure compares
-the learned model of KRR and GPR based on a ExpSineSquared kernel, which is
-suited for learning periodic functions. The kernel's hyperparameters control
-the smoothness (length_scale) and periodicity of the kernel (periodicity).
-Moreover, the noise level
-of the data is learned explicitly by GPR by an additional WhiteKernel component
-in the kernel and by the regularization parameter alpha of KRR.
-
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_compare_gpr_krr_005.png
-   :target: ../auto_examples/gaussian_process/plot_compare_gpr_krr.html
-   :align: center
-
-The figure shows that both methods learn reasonable models of the target
-function. GPR provides reasonable confidence bounds on the prediction which are not
-available for KRR. A major difference between the two methods is the time
-required for fitting and predicting: while fitting KRR is fast in principle,
-the grid-search for hyperparameter optimization scales exponentially with the
-number of hyperparameters ("curse of dimensionality"). The gradient-based
-optimization of the parameters in GPR does not suffer from this exponential
-scaling and is thus considerably faster on this example with 3-dimensional
-hyperparameter space. The time for predicting is similar; however, generating
-the variance of the predictive distribution of GPR takes considerably longer
-than just predicting the mean.
-
-GPR on Mauna Loa CO2 data
--------------------------
-
-This example is based on Section 5.4.3 of [RW2006]_.
-It illustrates an example of complex kernel engineering and
-hyperparameter optimization using gradient ascent on the
-log-marginal-likelihood. The data consists of the monthly average atmospheric
-CO2 concentrations (in parts per million by volume (ppmv)) collected at the
-Mauna Loa Observatory in Hawaii, between 1958 and 1997. The objective is to
-model the CO2 concentration as a function of the time t.
-
-The kernel is composed of several terms that are responsible for explaining
-different properties of the signal:
-
-- a long term, smooth rising trend is to be explained by an RBF kernel. The
-  RBF kernel with a large length-scale enforces this component to be smooth;
-  it is not enforced that the trend is rising which leaves this choice to the
-  GP. The specific length-scale and the amplitude are free hyperparameters.
-
-- a seasonal component, which is to be explained by the periodic
-  ExpSineSquared kernel with a fixed periodicity of 1 year. The length-scale
-  of this periodic component, controlling its smoothness, is a free parameter.
-  In order to allow decaying away from exact periodicity, the product with an
-  RBF kernel is taken. The length-scale of this RBF component controls the
-  decay time and is a further free parameter.
-
-- smaller, medium term irregularities are to be explained by a
-  RationalQuadratic kernel component, whose length-scale and alpha parameter,
-  which determines the diffuseness of the length-scales, are to be determined.
-  According to [RW2006]_, these irregularities can better be explained by
-  a RationalQuadratic than an RBF kernel component, probably because it can
-  accommodate several length-scales.
-
-- a "noise" term, consisting of an RBF kernel contribution, which shall
-  explain the correlated noise components such as local weather phenomena,
-  and a WhiteKernel contribution for the white noise. The relative amplitudes
-  and the RBF's length scale are further free parameters.
-
-Maximizing the log-marginal-likelihood after subtracting the target's mean
-yields the following kernel with an LML of -83.214:
-
-::
-
-   34.4**2 * RBF(length_scale=41.8)
-   + 3.27**2 * RBF(length_scale=180) * ExpSineSquared(length_scale=1.44,
-                                                      periodicity=1)
-   + 0.446**2 * RationalQuadratic(alpha=17.7, length_scale=0.957)
-   + 0.197**2 * RBF(length_scale=0.138) + WhiteKernel(noise_level=0.0336)
-
-Thus, most of the target signal (34.4ppm) is explained by a long-term rising
-trend (length-scale 41.8 years). The periodic component has an amplitude of
-3.27ppm, a decay time of 180 years and a length-scale of 1.44. The long decay
-time indicates that we have a locally very close to periodic seasonal
-component. The correlated noise has an amplitude of 0.197ppm with a length
-scale of 0.138 years and a white-noise contribution of 0.197ppm. Thus, the
-overall noise level is very small, indicating that the data can be very well
-explained by the model. The figure shows also that the model makes very
-confident predictions until around 2015
-
-.. figure:: ../auto_examples/gaussian_process/images/sphx_glr_plot_gpr_co2_003.png
-   :target: ../auto_examples/gaussian_process/plot_gpr_co2.html
-   :align: center
+   * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy_targets.py`
+   * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_noisy.py`
+   * :ref:`sphx_glr_auto_examples_gaussian_process_plot_compare_gpr_krr.py`
+   * :ref:`sphx_glr_auto_examples_gaussian_process_plot_gpr_co2.py`
 
 .. _gpc:
 
@@ -401,15 +275,17 @@ The specification of each hyperparameter is stored in the form of an instance of
 hyperparameter with name "x" must have the attributes self.x and self.x_bounds.
 
 The abstract base class for all kernels is :class:`Kernel`. Kernel implements a
-similar interface as :class:`Estimator`, providing the methods ``get_params()``,
-``set_params()``, and ``clone()``. This allows setting kernel values also via
-meta-estimators such as :class:`Pipeline` or :class:`GridSearch`. Note that due to the nested
+similar interface as :class:`~sklearn.base.BaseEstimator`, providing the
+methods ``get_params()``, ``set_params()``, and ``clone()``. This allows
+setting kernel values also via meta-estimators such as
+:class:`~sklearn.pipeline.Pipeline` or
+:class:`~sklearn.model_selection.GridSearchCV`. Note that due to the nested
 structure of kernels (by applying kernel operators, see below), the names of
-kernel parameters might become relatively complicated. In general, for a
-binary kernel operator, parameters of the left operand are prefixed with ``k1__``
-and parameters of the right operand with ``k2__``. An additional convenience
-method is ``clone_with_theta(theta)``, which returns a cloned version of the
-kernel but with the hyperparameters set to ``theta``. An illustrative example:
+kernel parameters might become relatively complicated. In general, for a binary
+kernel operator, parameters of the left operand are prefixed with ``k1__`` and
+parameters of the right operand with ``k2__``. An additional convenience method
+is ``clone_with_theta(theta)``, which returns a cloned version of the kernel
+but with the hyperparameters set to ``theta``. An illustrative example:
 
     >>> from sklearn.gaussian_process.kernels import ConstantKernel, RBF
     >>> kernel = ConstantKernel(constant_value=1.0, constant_value_bounds=(0.0, 10.0)) * RBF(length_scale=0.5, length_scale_bounds=(0.0, 10.0)) + RBF(length_scale=2.0, length_scale_bounds=(0.0, 10.0))
diff --git a/doc/modules/grid_search.rst b/doc/modules/grid_search.rst
index 851f9a202fa2f..3f99e7841bb00 100644
--- a/doc/modules/grid_search.rst
+++ b/doc/modules/grid_search.rst
@@ -81,7 +81,7 @@ evaluated and the best combination is retained.
       of Grid Search coupling parameters from a text documents feature
       extractor (n-gram count vectorizer and TF-IDF transformer) with a
       classifier (here a linear SVM trained with SGD with either elastic
-      net or L2 penalty) using a :class:`pipeline.Pipeline` instance.
+      net or L2 penalty) using a :class:`~sklearn.pipeline.Pipeline` instance.
 
     - See :ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`
       for an example of Grid Search within a cross validation loop on the iris
diff --git a/doc/modules/impute.rst b/doc/modules/impute.rst
index 6314b2ea71737..0bb62fa03bc63 100644
--- a/doc/modules/impute.rst
+++ b/doc/modules/impute.rst
@@ -22,9 +22,9 @@ Univariate vs. Multivariate Imputation
 
 One type of imputation algorithm is univariate, which imputes values in the
 i-th feature dimension using only non-missing values in that feature dimension
-(e.g. :class:`impute.SimpleImputer`). By contrast, multivariate imputation
+(e.g. :class:`SimpleImputer`). By contrast, multivariate imputation
 algorithms use the entire set of available feature dimensions to estimate the
-missing values (e.g. :class:`impute.IterativeImputer`).
+missing values (e.g. :class:`IterativeImputer`).
 
 
 .. _single_imputer:
@@ -87,6 +87,8 @@ string values or pandas categoricals when using the ``'most_frequent'`` or
      ['a' 'y']
      ['b' 'y']]
 
+For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
+
 .. _iterative_imputer:
 
 
@@ -190,19 +192,20 @@ Nearest neighbors imputation
 
 The :class:`KNNImputer` class provides imputation for filling in missing values
 using the k-Nearest Neighbors approach. By default, a euclidean distance metric
-that supports missing values, :func:`~sklearn.metrics.nan_euclidean_distances`,
-is used to find the nearest neighbors. Each missing feature is imputed using
-values from ``n_neighbors`` nearest neighbors that have a value for the
-feature. The feature of the neighbors are averaged uniformly or weighted by
-distance to each neighbor. If a sample has more than one feature missing, then
-the neighbors for that sample can be different depending on the particular
-feature being imputed. When the number of available neighbors is less than
-`n_neighbors` and there are no defined distances to the training set, the
-training set average for that feature is used during imputation. If there is at
-least one neighbor with a defined distance, the weighted or unweighted average
-of the remaining neighbors will be used during imputation. If a feature is
-always missing in training, it is removed during `transform`. For more
-information on the methodology, see ref. [OL2001]_.
+that supports missing values,
+:func:`~sklearn.metrics.pairwise.nan_euclidean_distances`, is used to find the
+nearest neighbors. Each missing feature is imputed using values from
+``n_neighbors`` nearest neighbors that have a value for the feature. The
+feature of the neighbors are averaged uniformly or weighted by distance to each
+neighbor. If a sample has more than one feature missing, then the neighbors for
+that sample can be different depending on the particular feature being imputed.
+When the number of available neighbors is less than `n_neighbors` and there are
+no defined distances to the training set, the training set average for that
+feature is used during imputation. If there is at least one neighbor with a
+defined distance, the weighted or unweighted average of the remaining neighbors
+will be used during imputation. If a feature is always missing in training, it
+is removed during `transform`. For more information on the methodology, see
+ref. [OL2001]_.
 
 The following snippet demonstrates how to replace missing values,
 encoded as ``np.nan``, using the mean feature value of the two nearest
@@ -219,6 +222,8 @@ neighbors of samples with missing values::
            [5.5, 6. , 5. ],
            [8. , 8. , 7. ]])
 
+For another example on usage, see :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
+
 .. topic:: References
 
   .. [OL2001] Olga Troyanskaya, Michael Cantor, Gavin Sherlock, Pat Brown,
@@ -303,10 +308,12 @@ whether or not they contain missing values::
   >>> indicator.features_
   array([0, 1, 2, 3])
 
-When using the :class:`MissingIndicator` in a :class:`Pipeline`, be sure to use
-the :class:`FeatureUnion` or :class:`ColumnTransformer` to add the indicator
-features to the regular features. First we obtain the `iris` dataset, and add
-some missing values to it.
+When using the :class:`MissingIndicator` in a
+:class:`~sklearn.pipeline.Pipeline`, be sure to use the
+:class:`~sklearn.pipeline.FeatureUnion` or
+:class:`~sklearn.compose.ColumnTransformer` to add the indicator features to
+the regular features. First we obtain the `iris` dataset, and add some missing
+values to it.
 
   >>> from sklearn.datasets import load_iris
   >>> from sklearn.impute import SimpleImputer, MissingIndicator
@@ -319,9 +326,9 @@ some missing values to it.
   >>> X_train, X_test, y_train, _ = train_test_split(X, y, test_size=100,
   ...                                                random_state=0)
 
-Now we create a :class:`FeatureUnion`. All features will be imputed using
-:class:`SimpleImputer`, in order to enable classifiers to work with this data.
-Additionally, it adds the indicator variables from
+Now we create a :class:`~sklearn.pipeline.FeatureUnion`. All features will be
+imputed using :class:`SimpleImputer`, in order to enable classifiers to work
+with this data. Additionally, it adds the indicator variables from
 :class:`MissingIndicator`.
 
   >>> transformer = FeatureUnion(
@@ -334,8 +341,8 @@ Additionally, it adds the indicator variables from
   (100, 8)
 
 Of course, we cannot use the transformer to make any predictions. We should
-wrap this in a :class:`Pipeline` with a classifier (e.g., a
-:class:`DecisionTreeClassifier`) to be able to make predictions.
+wrap this in a :class:`~sklearn.pipeline.Pipeline` with a classifier (e.g., a
+:class:`~sklearn.tree.DecisionTreeClassifier`) to be able to make predictions.
 
   >>> clf = make_pipeline(transformer, DecisionTreeClassifier())
   >>> clf = clf.fit(X_train, y_train)
diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst
index 40e8e8b526d1e..2166227daf247 100644
--- a/doc/modules/kernel_approximation.rst
+++ b/doc/modules/kernel_approximation.rst
@@ -108,7 +108,7 @@ The additive chi squared kernel as used here is given by
 
         k(x, y) = \sum_i \frac{2x_iy_i}{x_i+y_i}
 
-This is not exactly the same as :func:`sklearn.metrics.additive_chi2_kernel`.
+This is not exactly the same as :func:`sklearn.metrics.pairwise.additive_chi2_kernel`.
 The authors of [VZ2010]_ prefer the version above as it is always positive
 definite.
 Since the kernel is additive, it is possible to treat all components
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 154bbe5ee5cd7..04d58c925e464 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -37,7 +37,7 @@ solves a problem of the form:
    :align: center
    :scale: 50%
 
-:class:`LinearRegression` will take in its ``fit`` method arrays X, y
+:class:`LinearRegression` will take in its ``fit`` method arrays ``X``, ``y``
 and will store the coefficients :math:`w` of the linear model in its
 ``coef_`` member::
 
@@ -114,7 +114,7 @@ of shrinkage and thus the coefficients become more robust to collinearity.
 
 
 As with other linear models, :class:`Ridge` will take in its ``fit`` method
-arrays X, y and will store the coefficients :math:`w` of the linear model in
+arrays ``X``, ``y`` and will store the coefficients :math:`w` of the linear model in
 its ``coef_`` member::
 
     >>> from sklearn import linear_model
@@ -889,12 +889,16 @@ the probability of the positive class :math:`P(y_i=1|X_i)` as
 
 .. math:: \hat{p}(X_i) = \operatorname{expit}(X_i w + w_0) = \frac{1}{1 + \exp(-X_i w - w_0)}.
 
+
 As an optimization problem, binary
 class logistic regression with regularization term :math:`r(w)` minimizes the
 following cost function:
 
-.. math:: \min_{w} C \sum_{i=1}^n \left(-y_i \log(\hat{p}(X_i)) - (1 - y_i) \log(1 - \hat{p}(X_i))\right) + r(w).
-
+.. math::
+    :name: regularized-logistic-loss
+   
+    \min_{w} C \sum_{i=1}^n \left(-y_i \log(\hat{p}(X_i)) - (1 - y_i) \log(1 - \hat{p}(X_i))\right) + r(w).
+   
 
 We currently provide four choices for the regularization term  :math:`r(w)`  via
 the `penalty` argument:
@@ -947,16 +951,17 @@ The objective for the optimization becomes
 
 Where :math:`[P]` represents the Iverson bracket which evaluates to :math:`0`
 if :math:`P` is false, otherwise it evaluates to :math:`1`. We currently provide four choices
-for the regularization term :math:`r(W)` via the `penalty` argument:
+for the regularization term :math:`r(W)` via the `penalty` argument, where :math:`m`
+is the number of features:
 
 +----------------+----------------------------------------------------------------------------------+
 | penalty        | :math:`r(W)`                                                                     |
 +================+==================================================================================+
 | `None`         | :math:`0`                                                                        |
 +----------------+----------------------------------------------------------------------------------+
-| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^n\sum_{j=1}^{K}|W_{i,j}|`                        |
+| :math:`\ell_1` | :math:`\|W\|_{1,1} = \sum_{i=1}^m\sum_{j=1}^{K}|W_{i,j}|`                        |
 +----------------+----------------------------------------------------------------------------------+
-| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^n\sum_{j=1}^{K} W_{i,j}^2`   |
+| :math:`\ell_2` | :math:`\frac{1}{2}\|W\|_F^2 = \frac{1}{2}\sum_{i=1}^m\sum_{j=1}^{K} W_{i,j}^2`   |
 +----------------+----------------------------------------------------------------------------------+
 | `ElasticNet`   | :math:`\frac{1 - \rho}{2}\|W\|_F^2 + \rho \|W\|_{1,1}`                           |
 +----------------+----------------------------------------------------------------------------------+
@@ -1062,15 +1067,16 @@ with `loss="log_loss"`, which might be even faster but requires more tuning.
 .. topic:: Differences from liblinear:
 
    There might be a difference in the scores obtained between
-   :class:`LogisticRegression` with ``solver=liblinear``
-   or :class:`LinearSVC` and the external liblinear library directly,
-   when ``fit_intercept=False`` and the fit ``coef_`` (or) the data to
-   be predicted are zeroes. This is because for the sample(s) with
-   ``decision_function`` zero, :class:`LogisticRegression` and :class:`LinearSVC`
-   predict the negative class, while liblinear predicts the positive class.
-   Note that a model with ``fit_intercept=False`` and having many samples with
-   ``decision_function`` zero, is likely to be a underfit, bad model and you are
-   advised to set ``fit_intercept=True`` and increase the intercept_scaling.
+   :class:`LogisticRegression` with ``solver=liblinear`` or
+   :class:`~sklearn.svm.LinearSVC` and the external liblinear library directly,
+   when ``fit_intercept=False`` and the fit ``coef_`` (or) the data to be
+   predicted are zeroes. This is because for the sample(s) with
+   ``decision_function`` zero, :class:`LogisticRegression` and
+   :class:`~sklearn.svm.LinearSVC` predict the negative class, while liblinear
+   predicts the positive class. Note that a model with ``fit_intercept=False``
+   and having many samples with ``decision_function`` zero, is likely to be a
+   underfit, bad model and you are advised to set ``fit_intercept=True`` and
+   increase the intercept_scaling.
 
 .. note:: **Feature selection with sparse logistic regression**
 
diff --git a/doc/modules/manifold.rst b/doc/modules/manifold.rst
index 40bbea17a8309..1656c09f1371d 100644
--- a/doc/modules/manifold.rst
+++ b/doc/modules/manifold.rst
@@ -130,8 +130,10 @@ distances between all points.  Isomap can be performed with the object
    :align: center
    :scale: 50
 
-Complexity
-----------
+|details-start|
+**Complexity**
+|details-split|
+
 The Isomap algorithm comprises three stages:
 
 1. **Nearest neighbor search.**  Isomap uses
@@ -162,6 +164,8 @@ The overall complexity of Isomap is
 * :math:`k` : number of nearest neighbors
 * :math:`d` : output dimension
 
+|details-end|
+
 .. topic:: References:
 
    * `"A global geometric framework for nonlinear dimensionality reduction"
@@ -187,8 +191,9 @@ Locally linear embedding can be performed with function
    :align: center
    :scale: 50
 
-Complexity
-----------
+|details-start|
+**Complexity**
+|details-split|
 
 The standard LLE algorithm comprises three stages:
 
@@ -209,6 +214,8 @@ The overall complexity of standard LLE is
 * :math:`k` : number of nearest neighbors
 * :math:`d` : output dimension
 
+|details-end|
+
 .. topic:: References:
 
    * `"Nonlinear dimensionality reduction by locally linear embedding"
@@ -241,8 +248,9 @@ It requires ``n_neighbors > n_components``.
    :align: center
    :scale: 50
 
-Complexity
-----------
+|details-start|
+**Complexity**
+|details-split|
 
 The MLLE algorithm comprises three stages:
 
@@ -265,6 +273,8 @@ The overall complexity of MLLE is
 * :math:`k` : number of nearest neighbors
 * :math:`d` : output dimension
 
+|details-end|
+
 .. topic:: References:
 
    * `"MLLE: Modified Locally Linear Embedding Using Multiple Weights"
@@ -291,8 +301,9 @@ It requires ``n_neighbors > n_components * (n_components + 3) / 2``.
    :align: center
    :scale: 50
 
-Complexity
-----------
+|details-start|
+**Complexity**
+|details-split|
 
 The HLLE algorithm comprises three stages:
 
@@ -313,6 +324,8 @@ The overall complexity of standard HLLE is
 * :math:`k` : number of nearest neighbors
 * :math:`d` : output dimension
 
+|details-end|
+
 .. topic:: References:
 
    * `"Hessian Eigenmaps: Locally linear embedding techniques for
@@ -335,8 +348,9 @@ preserving local distances. Spectral embedding can be  performed with the
 function :func:`spectral_embedding` or its object-oriented counterpart
 :class:`SpectralEmbedding`.
 
-Complexity
-----------
+|details-start|
+**Complexity**
+|details-split|
 
 The Spectral Embedding (Laplacian Eigenmaps) algorithm comprises three stages:
 
@@ -358,6 +372,8 @@ The overall complexity of spectral embedding is
 * :math:`k` : number of nearest neighbors
 * :math:`d` : output dimension
 
+|details-end|
+
 .. topic:: References:
 
    * `"Laplacian Eigenmaps for Dimensionality Reduction
@@ -383,8 +399,9 @@ tangent spaces to learn the embedding.  LTSA can be performed with function
    :align: center
    :scale: 50
 
-Complexity
-----------
+|details-start|
+**Complexity**
+|details-split|
 
 The LTSA algorithm comprises three stages:
 
@@ -404,6 +421,8 @@ The overall complexity of standard LTSA is
 * :math:`k` : number of nearest neighbors
 * :math:`d` : output dimension
 
+|details-end|
+
 .. topic:: References:
 
    * :arxiv:`"Principal manifolds and nonlinear dimensionality reduction via
@@ -448,8 +467,9 @@ the similarities chosen in some optimal ways. The objective, called the
 stress, is then defined by :math:`\sum_{i < j} d_{ij}(X) - \hat{d}_{ij}(X)`
 
 
-Metric MDS
-----------
+|details-start|
+**Metric MDS**
+|details-split|
 
 The simplest metric :class:`MDS` model, called *absolute MDS*, disparities are defined by
 :math:`\hat{d}_{ij} = S_{ij}`. With absolute MDS, the value :math:`S_{ij}`
@@ -458,8 +478,11 @@ should then correspond exactly to the distance between point :math:`i` and
 
 Most commonly, disparities are set to :math:`\hat{d}_{ij} = b S_{ij}`.
 
-Nonmetric MDS
--------------
+|details-end|
+
+|details-start|
+**Nonmetric MDS**
+|details-split|
 
 Non metric :class:`MDS` focuses on the ordination of the data. If
 :math:`S_{ij} > S_{jk}`, then the embedding should enforce :math:`d_{ij} <
@@ -490,6 +513,7 @@ in the metric case.
    :align: center
    :scale: 60
 
+|details-end|
 
 .. topic:: References:
 
@@ -551,8 +575,10 @@ The disadvantages to using t-SNE are roughly:
    :align: center
    :scale: 50
 
-Optimizing t-SNE
-----------------
+|details-start|
+**Optimizing t-SNE**
+|details-split|
+
 The main purpose of t-SNE is visualization of high-dimensional data. Hence,
 it works best when the data will be embedded on two or three dimensions.
 
@@ -601,8 +627,11 @@ but less accurate results.
 provides a good discussion of the effects of the various parameters, as well
 as interactive plots to explore the effects of different parameters.
 
-Barnes-Hut t-SNE
-----------------
+|details-end|
+
+|details-start|
+**Barnes-Hut t-SNE**
+|details-split|
 
 The Barnes-Hut t-SNE that has been implemented here is usually much slower than
 other manifold learning algorithms. The optimization is quite difficult
@@ -638,6 +667,7 @@ imply that the data cannot be correctly classified by a supervised model. It
 might be the case that 2 dimensions are not high enough to accurately represent
 the internal structure of the data.
 
+|details-end|
 
 .. topic:: References:
 
diff --git a/doc/modules/mixture.rst b/doc/modules/mixture.rst
index fbf0551da93a4..e9cc94b1d493d 100644
--- a/doc/modules/mixture.rst
+++ b/doc/modules/mixture.rst
@@ -68,33 +68,36 @@ full covariance.
     * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_pdf.py` for an example on plotting the
       density estimation.
 
-Pros and cons of class :class:`GaussianMixture`
------------------------------------------------
+|details-start|
+**Pros and cons of class GaussianMixture**
+|details-split|
 
-Pros
-....
+.. topic:: Pros:
 
-:Speed: It is the fastest algorithm for learning mixture models
+    :Speed: It is the fastest algorithm for learning mixture models
 
-:Agnostic: As this algorithm maximizes only the likelihood, it
-  will not bias the means towards zero, or bias the cluster sizes to
-  have specific structures that might or might not apply.
+    :Agnostic: As this algorithm maximizes only the likelihood, it
+      will not bias the means towards zero, or bias the cluster sizes to
+      have specific structures that might or might not apply.
 
-Cons
-....
+.. topic:: Cons:
 
-:Singularities: When one has insufficiently many points per
-   mixture, estimating the covariance matrices becomes difficult,
-   and the algorithm is known to diverge and find solutions with
-   infinite likelihood unless one regularizes the covariances artificially.
+    :Singularities: When one has insufficiently many points per
+      mixture, estimating the covariance matrices becomes difficult,
+      and the algorithm is known to diverge and find solutions with
+      infinite likelihood unless one regularizes the covariances artificially.
 
-:Number of components: This algorithm will always use all the
-   components it has access to, needing held-out data
-   or information theoretical criteria to decide how many components to use
-   in the absence of external cues.
+    :Number of components: This algorithm will always use all the
+      components it has access to, needing held-out data
+      or information theoretical criteria to decide how many components to use
+      in the absence of external cues.
 
-Selecting the number of components in a classical Gaussian Mixture Model
-------------------------------------------------------------------------
+|details-end|
+
+
+|details-start|
+**Selecting the number of components in a classical Gaussian Mixture model**
+|details-split|
 
 The BIC criterion can be used to select the number of components in a Gaussian
 Mixture in an efficient way. In theory, it recovers the true number of
@@ -114,10 +117,13 @@ model.
     * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_selection.py` for an example
       of model selection performed with classical Gaussian mixture.
 
+|details-end|
+
 .. _expectation_maximization:
 
-Estimation algorithm Expectation-maximization
------------------------------------------------
+|details-start|
+**Estimation algorithm expectation-maximization**
+|details-split|
 
 The main difficulty in learning Gaussian mixture models from unlabeled
 data is that one usually doesn't know which points came from
@@ -135,8 +141,11 @@ parameters to maximize the likelihood of the data given those
 assignments. Repeating this process is guaranteed to always converge
 to a local optimum.
 
-Choice of the Initialization Method
------------------------------------
+|details-end|
+
+|details-start|
+**Choice of the Initialization method**
+|details-split|
 
 There is a choice of four initialization methods (as well as inputting user defined
 initial means) to generate the initial centers for the model components:
@@ -172,6 +181,8 @@ random
     * See :ref:`sphx_glr_auto_examples_mixture_plot_gmm_init.py` for an example of
       using different initializations in Gaussian Mixture.
 
+|details-end|
+
 .. _bgmm:
 
 Variational Bayesian Gaussian Mixture
@@ -183,8 +194,7 @@ similar to the one defined by :class:`GaussianMixture`.
 
 .. _variational_inference:
 
-Estimation algorithm: variational inference
----------------------------------------------
+**Estimation algorithm: variational inference**
 
 Variational inference is an extension of expectation-maximization that
 maximizes a lower bound on model evidence (including
@@ -282,48 +292,47 @@ from the two resulting mixtures.
       ``weight_concentration_prior_type`` for different values of the parameter
       ``weight_concentration_prior``.
 
+|details-start|
+**Pros and cons of variational inference with BayesianGaussianMixture**
+|details-split|
 
-Pros and cons of variational inference with :class:`BayesianGaussianMixture`
-----------------------------------------------------------------------------
-
-Pros
-.....
+.. topic:: Pros:
 
-:Automatic selection: when ``weight_concentration_prior`` is small enough and
-   ``n_components`` is larger than what is found necessary by the model, the
-   Variational Bayesian mixture model has a natural tendency to set some mixture
-   weights values close to zero. This makes it possible to let the model choose
-   a suitable number of effective components automatically. Only an upper bound
-   of this number needs to be provided. Note however that the "ideal" number of
-   active components is very application specific and is typically ill-defined
-   in a data exploration setting.
+    :Automatic selection: when ``weight_concentration_prior`` is small enough and
+      ``n_components`` is larger than what is found necessary by the model, the
+      Variational Bayesian mixture model has a natural tendency to set some mixture
+      weights values close to zero. This makes it possible to let the model choose
+      a suitable number of effective components automatically. Only an upper bound
+      of this number needs to be provided. Note however that the "ideal" number of
+      active components is very application specific and is typically ill-defined
+      in a data exploration setting.
 
-:Less sensitivity to the number of parameters: unlike finite models, which will
-   almost always use all components as much as they can, and hence will produce
-   wildly different solutions for different numbers of components, the
-   variational inference with a Dirichlet process prior
-   (``weight_concentration_prior_type='dirichlet_process'``) won't change much
-   with changes to the parameters, leading to more stability and less tuning.
+    :Less sensitivity to the number of parameters: unlike finite models, which will
+      almost always use all components as much as they can, and hence will produce
+      wildly different solutions for different numbers of components, the
+      variational inference with a Dirichlet process prior
+      (``weight_concentration_prior_type='dirichlet_process'``) won't change much
+      with changes to the parameters, leading to more stability and less tuning.
 
-:Regularization: due to the incorporation of prior information,
-   variational solutions have less pathological special cases than
-   expectation-maximization solutions.
+    :Regularization: due to the incorporation of prior information,
+      variational solutions have less pathological special cases than
+      expectation-maximization solutions.
 
 
-Cons
-.....
+.. topic:: Cons:
 
-:Speed: the extra parametrization necessary for variational inference makes
-   inference slower, although not by much.
+    :Speed: the extra parametrization necessary for variational inference makes
+      inference slower, although not by much.
 
-:Hyperparameters: this algorithm needs an extra hyperparameter
-   that might need experimental tuning via cross-validation.
+    :Hyperparameters: this algorithm needs an extra hyperparameter
+      that might need experimental tuning via cross-validation.
 
-:Bias: there are many implicit biases in the inference algorithms (and also in
-   the Dirichlet process if used), and whenever there is a mismatch between
-   these biases and the data it might be possible to fit better models using a
-   finite mixture.
+    :Bias: there are many implicit biases in the inference algorithms (and also in
+      the Dirichlet process if used), and whenever there is a mismatch between
+      these biases and the data it might be possible to fit better models using a
+      finite mixture.
 
+|details-end|
 
 .. _dirichlet_process:
 
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 670e661d92ef7..32c2fc22ec0f6 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -162,6 +162,12 @@ the :func:`fbeta_score` function::
     >>> grid = GridSearchCV(LinearSVC(dual="auto"), param_grid={'C': [1, 10]},
     ...                     scoring=ftwo_scorer, cv=5)
 
+
+|details-start|
+**Custom scorer objects**
+|details-split|
+
+
 The second use case is to build a completely custom scorer object
 from a simple python function using :func:`make_scorer`, which can
 take several parameters:
@@ -202,13 +208,21 @@ Here is an example of building custom scorers, and of using the
     >>> score(clf, X, y)
     -0.69...
 
+|details-end|
 
 .. _diy_scoring:
 
 Implementing your own scoring object
 ------------------------------------
+
 You can generate even more flexible model scorers by constructing your own
 scoring object from scratch, without using the :func:`make_scorer` factory.
+
+
+|details-start|
+**How to build a scorer from scratch**
+|details-split|
+
 For a callable to be a scorer, it needs to meet the protocol specified by
 the following two rules:
 
@@ -249,6 +263,8 @@ the following two rules:
         ...  cv=5,
         ...  n_jobs=-1) # doctest: +SKIP
 
+|details-end|
+
 .. _multimetric_scoring:
 
 Using multiple metric evaluation
@@ -807,8 +823,8 @@ binary case. The :func:`average_precision_score` function supports multiclass
 and multilabel formats by computing each class score in a One-vs-the-rest (OvR)
 fashion and averaging them or not depending of its ``average`` argument value.
 
-The :func:`PredictionRecallDisplay.from_estimator` and
-:func:`PredictionRecallDisplay.from_predictions` functions will plot the
+The :func:`PrecisionRecallDisplay.from_estimator` and
+:func:`PrecisionRecallDisplay.from_predictions` functions will plot the
 precision-recall curve as follows.
 
 .. image:: ../auto_examples/model_selection/images/sphx_glr_plot_precision_recall_001.png
@@ -2769,8 +2785,8 @@ model would grow with the predicted value of `E[y|X]` (either linearly for
 Poisson or quadratically for Gamma).
 
 When fitting a linear least squares regression model (see
-:class:`~sklearn.linear_mnodel.LinearRegression` and
-:class:`~sklearn.linear_mnodel.Ridge`), we can use this plot to check
+:class:`~sklearn.linear_model.LinearRegression` and
+:class:`~sklearn.linear_model.Ridge`), we can use this plot to check
 if some of the `model assumptions
 <https://en.wikipedia.org/wiki/Ordinary_least_squares#Assumptions>`_
 are met, in particular that the residuals should be uncorrelated, their
diff --git a/doc/modules/naive_bayes.rst b/doc/modules/naive_bayes.rst
index 1cb8aa0d6dedf..b0b32c28e455a 100644
--- a/doc/modules/naive_bayes.rst
+++ b/doc/modules/naive_bayes.rst
@@ -253,9 +253,9 @@ to class :math:`c`, :math:`N_{c} = |\{ j \in J\mid y_j = c\}|` is the number
 of samples with class c, :math:`\alpha` is a smoothing parameter and
 :math:`n_i` is the number of available categories of feature :math:`i`.
 
-:class:`CategoricalNB` assumes that the sample matrix :math:`X` is encoded
-(for instance with the help of :class:`OrdinalEncoder`) such that all
-categories for each feature :math:`i` are represented with numbers
+:class:`CategoricalNB` assumes that the sample matrix :math:`X` is encoded (for
+instance with the help of :class:`~sklearn.preprocessing.OrdinalEncoder`) such
+that all categories for each feature :math:`i` are represented with numbers
 :math:`0, ..., n_i - 1` where :math:`n_i` is the number of available categories
 of feature :math:`i`.
 
diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
index 90856b6933f3e..d3a7df74e6348 100644
--- a/doc/modules/neighbors.rst
+++ b/doc/modules/neighbors.rst
@@ -139,9 +139,9 @@ including specification of query strategies, distance metrics, etc. For a list
 of valid metrics use :meth:`KDTree.valid_metrics` and :meth:`BallTree.valid_metrics`:
 
     >>> from sklearn.neighbors import KDTree, BallTree
-    >>> KDTree.valid_metrics()
+    >>> KDTree.valid_metrics
     ['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity']
-    >>> BallTree.valid_metrics()
+    >>> BallTree.valid_metrics
     ['euclidean', 'l2', 'minkowski', 'p', 'manhattan', 'cityblock', 'l1', 'chebyshev', 'infinity', 'seuclidean', 'mahalanobis', 'hamming', 'canberra', 'braycurtis', 'jaccard', 'dice', 'rogerstanimoto', 'russellrao', 'sokalmichener', 'sokalsneath', 'haversine', 'pyfunc']
 
 .. _classification:
@@ -188,13 +188,9 @@ distance can be supplied to compute the weights.
 
 .. |classification_1| image:: ../auto_examples/neighbors/images/sphx_glr_plot_classification_001.png
    :target: ../auto_examples/neighbors/plot_classification.html
-   :scale: 50
-
-.. |classification_2| image:: ../auto_examples/neighbors/images/sphx_glr_plot_classification_002.png
-   :target: ../auto_examples/neighbors/plot_classification.html
-   :scale: 50
+   :scale: 75
 
-.. centered:: |classification_1| |classification_2|
+.. centered:: |classification_1|
 
 .. topic:: Examples:
 
@@ -472,15 +468,16 @@ leaf nodes.  The level of this switch can be specified with the parameter
 Valid Metrics for Nearest Neighbor Algorithms
 ---------------------------------------------
 
-For a list of available metrics, see the documentation of the :class:`DistanceMetric`
-class and the metrics listed in `sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`.
-Note that the "cosine" metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.
+For a list of available metrics, see the documentation of the
+:class:`~sklearn.metrics.DistanceMetric` class and the metrics listed in
+`sklearn.metrics.pairwise.PAIRWISE_DISTANCE_FUNCTIONS`. Note that the "cosine"
+metric uses :func:`~sklearn.metrics.pairwise.cosine_distances`.
 
 A list of valid metrics for any of the above algorithms can be obtained by using their
 ``valid_metric`` attribute. For example, valid metrics for ``KDTree`` can be generated by:
 
     >>> from sklearn.neighbors import KDTree
-    >>> print(sorted(KDTree.valid_metrics()))
+    >>> print(sorted(KDTree.valid_metrics))
     ['chebyshev', 'cityblock', 'euclidean', 'infinity', 'l1', 'l2', 'manhattan', 'minkowski', 'p']
 
 
diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst
index 995faa9e6d19c..388f32e7c6925 100644
--- a/doc/modules/neural_networks_supervised.rst
+++ b/doc/modules/neural_networks_supervised.rst
@@ -316,7 +316,7 @@ Tips on Practical Use
     attribute on the input vector X to [0, 1] or [-1, +1], or standardize
     it to have mean 0 and variance 1. Note that you must apply the *same*
     scaling to the test set for meaningful results.
-    You can use :class:`StandardScaler` for standardization.
+    You can use :class:`~sklearn.preprocessing.StandardScaler` for standardization.
 
       >>> from sklearn.preprocessing import StandardScaler  # doctest: +SKIP
       >>> scaler = StandardScaler()  # doctest: +SKIP
@@ -326,12 +326,13 @@ Tips on Practical Use
       >>> # apply same transformation to test data
       >>> X_test = scaler.transform(X_test)  # doctest: +SKIP
 
-    An alternative and recommended approach is to use :class:`StandardScaler`
-    in a :class:`Pipeline`
+    An alternative and recommended approach is to use
+    :class:`~sklearn.preprocessing.StandardScaler` in a
+    :class:`~sklearn.pipeline.Pipeline`
 
-  * Finding a reasonable regularization parameter :math:`\alpha` is
-    best done using :class:`GridSearchCV`, usually in the
-    range ``10.0 ** -np.arange(1, 7)``.
+  * Finding a reasonable regularization parameter :math:`\alpha` is best done
+    using :class:`~sklearn.model_selection.GridSearchCV`, usually in the range
+    ``10.0 ** -np.arange(1, 7)``.
 
   * Empirically, we observed that `L-BFGS` converges faster and
     with better solutions on small datasets. For relatively large
diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index 69045147d8af9..b968070f4b5da 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -10,9 +10,10 @@ The ``sklearn.preprocessing`` package provides several common
 utility functions and transformer classes to change raw feature vectors
 into a representation that is more suitable for the downstream estimators.
 
-In general, learning algorithms benefit from standardization of the data set. If
-some outliers are present in the set, robust scalers or transformers are more
-appropriate. The behaviors of the different scalers, transformers, and
+In general, many learning algorithms such as linear models benefit from standardization of the data set
+(see :ref:`sphx_glr_auto_examples_preprocessing_plot_scaling_importance.py`).
+If some outliers are present in the set, robust scalers or other transformers can
+be more appropriate. The behaviors of the different scalers, transformers, and
 normalizers on a dataset containing marginal outliers is highlighted in
 :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
 
@@ -883,13 +884,14 @@ cardinality categories are location based such as zip code or region. For the
 binary classification target, the target encoding is given by:
 
 .. math::
-    S_i = \lambda_i\frac{n_{iY}}{n_i} + (1 - \lambda_i)\frac{n_y}{n}
+    S_i = \lambda_i\frac{n_{iY}}{n_i} + (1 - \lambda_i)\frac{n_Y}{n}
 
 where :math:`S_i` is the encoding for category :math:`i`, :math:`n_{iY}` is the
-number of observations with :math:`Y=1` with category :math:`i`, :math:`n_i` is
-the number of observations with category :math:`i`, :math:`n_y` is the number of
+number of observations with :math:`Y=1` and category :math:`i`, :math:`n_i` is
+the number of observations with category :math:`i`, :math:`n_Y` is the number of
 observations with :math:`Y=1`, :math:`n` is the number of observations, and
-:math:`\lambda_i` is a shrinkage factor. The shrinkage factor is given by:
+:math:`\lambda_i` is a shrinkage factor for category :math:`i`. The shrinkage
+factor is given by:
 
 .. math::
     \lambda_i = \frac{n_i}{m + n_i}
@@ -897,40 +899,45 @@ observations with :math:`Y=1`, :math:`n` is the number of observations, and
 where :math:`m` is a smoothing factor, which is controlled with the `smooth`
 parameter in :class:`TargetEncoder`. Large smoothing factors will put more
 weight on the global mean. When `smooth="auto"`, the smoothing factor is
-computed as an empirical Bayes estimate: :math:`m=\sigma_c^2/\tau^2`, where
+computed as an empirical Bayes estimate: :math:`m=\sigma_i^2/\tau^2`, where
 :math:`\sigma_i^2` is the variance of `y` with category :math:`i` and
 :math:`\tau^2` is the global variance of `y`.
 
 For continuous targets, the formulation is similar to binary classification:
 
 .. math::
-    S_i = \lambda_i\frac{\sum_{k\in L_i}y_k}{n_i} + (1 - \lambda_i)\frac{\sum_{k=1}^{n}y_k}{n}
-
-where :math:`L_i` is the set of observations for which :math:`X=X_i` and
-:math:`n_i` is the cardinality of :math:`L_i`.
-
-:meth:`~TargetEncoder.fit_transform` internally relies on a cross validation
-scheme to prevent information from the target from leaking into the train-time
-representation for non-informative high-cardinality categorical variables and
-help prevent the downstream model to overfit spurious correlations. Note that
-as a result, `fit(X, y).transform(X)` does not equal `fit_transform(X, y)`. In
-:meth:`~TargetEncoder.fit_transform`, the training data is split into multiple
-folds and encodes each fold by using the encodings trained on the other folds.
-After cross validation is complete in :meth:`~TargetEncoder.fit_transform`, the
-target encoder learns one final encoding on the whole training set. This final
-encoding is used to encode categories in :meth:`~TargetEncoder.transform`. The
-following diagram shows the cross validation scheme in
+    S_i = \lambda_i\frac{\sum_{k\in L_i}Y_k}{n_i} + (1 - \lambda_i)\frac{\sum_{k=1}^{n}Y_k}{n}
+
+where :math:`L_i` is the set of observations with category :math:`i` and
+:math:`n_i` is the number of observations with category :math:`i`.
+
+:meth:`~TargetEncoder.fit_transform` internally relies on a :term:`cross fitting`
+scheme to prevent target information from leaking into the train-time
+representation, especially for non-informative high-cardinality categorical
+variables, and help prevent the downstream model from overfitting spurious
+correlations. Note that as a result, `fit(X, y).transform(X)` does not equal
+`fit_transform(X, y)`. In :meth:`~TargetEncoder.fit_transform`, the training
+data is split into *k* folds (determined by the `cv` parameter) and each fold is
+encoded using the encodings learnt using the other *k-1* folds. The following
+diagram shows the :term:`cross fitting` scheme in
 :meth:`~TargetEncoder.fit_transform` with the default `cv=5`:
 
 .. image:: ../images/target_encoder_cross_validation.svg
    :width: 600
    :align: center
 
-The :meth:`~TargetEncoder.fit` method does **not** use any cross validation
+:meth:`~TargetEncoder.fit_transform` also learns a 'full data' encoding using
+the whole training set. This is never used in
+:meth:`~TargetEncoder.fit_transform` but is saved to the attribute `encodings_`,
+for use when :meth:`~TargetEncoder.transform` is called. Note that the encodings
+learned for each fold during the :term:`cross fitting` scheme are not saved to
+an attribute.
+
+The :meth:`~TargetEncoder.fit` method does **not** use any :term:`cross fitting`
 schemes and learns one encoding on the entire training set, which is used to
 encode categories in :meth:`~TargetEncoder.transform`.
-:meth:`~TargetEncoder.fit`'s one encoding is the same as the final encoding
-learned in :meth:`~TargetEncoder.fit_transform`.
+This encoding is the same as the 'full data'
+encoding learned in :meth:`~TargetEncoder.fit_transform`.
 
 .. note::
   :class:`TargetEncoder` considers missing values, such as `np.nan` or `None`,
diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
index c50ed66868c1b..3f587d25b3450 100644
--- a/doc/modules/sgd.rst
+++ b/doc/modules/sgd.rst
@@ -249,6 +249,10 @@ quadratic in the number of samples.
 with a large number of training samples (> 10,000) for which the SGD
 variant can be several orders of magnitude faster.
 
+|details-start|
+**Mathematical details**
+|details-split|
+
 Its implementation is based on the implementation of the stochastic
 gradient descent. Indeed, the original optimization problem of the One-Class
 SVM is given by
@@ -282,6 +286,8 @@ This is similar to the optimization problems studied in section
 being the L2 norm. We just need to add the term :math:`b\nu` in the
 optimization loop.
 
+|details-end|
+
 As :class:`SGDClassifier` and :class:`SGDRegressor`, :class:`SGDOneClassSVM`
 supports averaged SGD. Averaging can be enabled by setting ``average=True``.
 
@@ -342,9 +348,9 @@ Tips on Practical Use
   * Stochastic Gradient Descent is sensitive to feature scaling, so it
     is highly recommended to scale your data. For example, scale each
     attribute on the input vector X to [0,1] or [-1,+1], or standardize
-    it to have mean 0 and variance 1. Note that the *same* scaling
-    must be applied to the test vector to obtain meaningful
-    results. This can be easily done using :class:`StandardScaler`::
+    it to have mean 0 and variance 1. Note that the *same* scaling must be
+    applied to the test vector to obtain meaningful results. This can be easily
+    done using :class:`~sklearn.preprocessing.StandardScaler`::
 
       from sklearn.preprocessing import StandardScaler
       scaler = StandardScaler()
@@ -410,6 +416,10 @@ where :math:`L` is a loss function that measures model (mis)fit and
 complexity; :math:`\alpha > 0` is a non-negative hyperparameter that controls
 the regularization strength.
 
+|details-start|
+**Loss functions details**
+|details-split|
+
 Different choices for :math:`L` entail different classifiers or regressors:
 
 - Hinge (soft-margin): equivalent to Support Vector Classification.
@@ -431,6 +441,8 @@ Different choices for :math:`L` entail different classifiers or regressors:
 - Epsilon-Insensitive: (soft-margin) equivalent to Support Vector Regression.
   :math:`L(y_i, f(x_i)) = \max(0, |y_i - f(x_i)| - \varepsilon)`.
 
+|details-end|
+
 All of the above loss functions can be regarded as an upper bound on the
 misclassification error (Zero-one loss) as shown in the Figure below.
 
@@ -491,7 +503,7 @@ where :math:`t` is the time step (there are a total of `n_samples * n_iter`
 time steps), :math:`t_0` is determined based on a heuristic proposed by Léon Bottou
 such that the expected initial updates are comparable with the expected
 size of the weights (this assuming that the norm of the training samples is
-approx. 1). The exact definition can be found in ``_init_t`` in :class:`BaseSGD`.
+approx. 1). The exact definition can be found in ``_init_t`` in `BaseSGD`.
 
 
 For regression the default learning rate schedule is inverse scaling
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index c5b998e48707a..8f97b8dee8806 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -60,14 +60,19 @@ capable of performing binary and multi-class classification on a dataset.
    :align: center
 
 
-:class:`SVC` and :class:`NuSVC` are similar methods, but accept
-slightly different sets of parameters and have different mathematical
-formulations (see section :ref:`svm_mathematical_formulation`). On the
-other hand, :class:`LinearSVC` is another (faster) implementation of Support
-Vector Classification for the case of a linear kernel. Note that
-:class:`LinearSVC` does not accept parameter ``kernel``, as this is
-assumed to be linear. It also lacks some of the attributes of
-:class:`SVC` and :class:`NuSVC`, like ``support_``.
+:class:`SVC` and :class:`NuSVC` are similar methods, but accept slightly
+different sets of parameters and have different mathematical formulations (see
+section :ref:`svm_mathematical_formulation`). On the other hand,
+:class:`LinearSVC` is another (faster) implementation of Support Vector
+Classification for the case of a linear kernel. It also
+lacks some of the attributes of :class:`SVC` and :class:`NuSVC`, like
+`support_`. :class:`LinearSVC` uses `squared_hinge` loss and due to its
+implementation in `liblinear` it also regularizes the intercept, if considered.
+This effect can however be reduced by carefully fine tuning its
+`intercept_scaling` parameter, which allows the intercept term to have a
+different regularization behavior compared to the other features. The
+classification results and score can therefore differ from the other two
+classifiers.
 
 As other classifiers, :class:`SVC`, :class:`NuSVC` and
 :class:`LinearSVC` take as input two arrays: an array `X` of shape
@@ -149,6 +154,10 @@ multi-class strategy, thus training `n_classes` models.
 See :ref:`svm_mathematical_formulation` for a complete description of
 the decision function.
 
+|details-start|
+**Details on multi-class strategies**
+|details-split|
+
 Note that the :class:`LinearSVC` also implements an alternative multi-class
 strategy, the so-called multi-class SVM formulated by Crammer and Singer
 [#8]_, by using the option ``multi_class='crammer_singer'``. In practice,
@@ -199,6 +208,8 @@ Then ``dual_coef_`` looks like this:
 |for SVs of class 0                                                        |for SVs of class 1                               |for SVs of class 2                               |
 +--------------------------------------------------------------------------+-------------------------------------------------+-------------------------------------------------+
 
+|details-end|
+
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_svm_plot_iris_svc.py`,
@@ -308,10 +319,15 @@ target.
 
 There are three different implementations of Support Vector Regression:
 :class:`SVR`, :class:`NuSVR` and :class:`LinearSVR`. :class:`LinearSVR`
-provides a faster implementation than :class:`SVR` but only considers
-the linear kernel, while :class:`NuSVR` implements a slightly different
-formulation than :class:`SVR` and :class:`LinearSVR`. See
-:ref:`svm_implementation_details` for further details.
+provides a faster implementation than :class:`SVR` but only considers the
+linear kernel, while :class:`NuSVR` implements a slightly different formulation
+than :class:`SVR` and :class:`LinearSVR`. Due to its implementation in
+`liblinear` :class:`LinearSVR` also regularizes the intercept, if considered.
+This effect can however be reduced by carefully fine tuning its
+`intercept_scaling` parameter, which allows the intercept term to have a
+different regularization behavior compared to the other features. The
+classification results and score can therefore differ from the other two
+classifiers. See :ref:`svm_implementation_details` for further details.
 
 As with classification classes, the fit method will take as
 argument vectors X, y, only that in this case y is expected to have
@@ -505,7 +521,6 @@ is advised to use :class:`~sklearn.model_selection.GridSearchCV` with
  * :ref:`sphx_glr_auto_examples_svm_plot_rbf_parameters.py`
  * :ref:`sphx_glr_auto_examples_svm_plot_svm_nonlinear.py`
 
-
 Custom Kernels
 --------------
 
@@ -523,8 +538,9 @@ classifiers, except that:
       use of ``fit()`` and ``predict()`` you will have unexpected results.
 
 
-Using Python functions as kernels
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+|details-start|
+**Using Python functions as kernels**
+|details-split|  
 
 You can use your own defined kernels by passing a function to the
 ``kernel`` parameter.
@@ -542,13 +558,13 @@ instance that will use that kernel::
     ...     return np.dot(X, Y.T)
     ...
     >>> clf = svm.SVC(kernel=my_kernel)
+  
+|details-end|
 
-.. topic:: Examples:
-
- * :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py`.
 
-Using the Gram matrix
-~~~~~~~~~~~~~~~~~~~~~
+|details-start|
+**Using the Gram matrix**
+|details-split|  
 
 You can pass pre-computed kernels by using the ``kernel='precomputed'``
 option. You should then pass Gram matrix instead of X to the `fit` and
@@ -571,6 +587,11 @@ test vectors must be provided:
     >>> clf.predict(gram_test)
     array([0, 1, 0])
 
+|details-end|
+
+.. topic:: Examples:
+
+ * :ref:`sphx_glr_auto_examples_svm_plot_custom_kernel.py`.
 
 .. _svm_mathematical_formulation:
 
@@ -667,8 +688,9 @@ term :math:`b`
     estimator used is :class:`~sklearn.linear_model.Ridge` regression,
     the relation between them is given as :math:`C = \frac{1}{alpha}`.
 
-LinearSVC
----------
+|details-start|
+**LinearSVC**
+|details-split|
 
 The primal problem can be equivalently formulated as
 
@@ -683,10 +705,13 @@ does not involve inner products between samples, so the famous kernel trick
 cannot be applied. This is why only the linear kernel is supported by
 :class:`LinearSVC` (:math:`\phi` is the identity function).
 
+|details-end|
+
 .. _nu_svc:
 
-NuSVC
------
+|details-start|
+**NuSVC**
+|details-split|
 
 The :math:`\nu`-SVC formulation [#7]_ is a reparameterization of the
 :math:`C`-SVC and therefore mathematically equivalent.
@@ -699,6 +724,7 @@ to a sample that lies on the wrong side of its margin boundary: it is either
 misclassified, or it is correctly classified but does not lie beyond the
 margin.
 
+|details-end|
 
 SVR
 ---
@@ -747,8 +773,9 @@ which holds the difference :math:`\alpha_i - \alpha_i^*`, ``support_vectors_`` w
 holds the support vectors, and ``intercept_`` which holds the independent
 term :math:`b`
 
-LinearSVR
----------
+|details-start|
+**LinearSVR**
+|details-split|
 
 The primal problem can be equivalently formulated as
 
@@ -760,6 +787,8 @@ where we make use of the epsilon-insensitive loss, i.e. errors of less than
 :math:`\varepsilon` are ignored. This is the form that is directly optimized
 by :class:`LinearSVR`.
 
+|details-end|
+
 .. _svm_implementation_details:
 
 Implementation details
diff --git a/doc/modules/tree.rst b/doc/modules/tree.rst
index f7d43c5a3d7da..cd15e7aadb696 100644
--- a/doc/modules/tree.rst
+++ b/doc/modules/tree.rst
@@ -27,8 +27,8 @@ Some advantages of decision trees are:
 
     - Requires little data preparation. Other techniques often require data
       normalization, dummy variables need to be created and blank values to
-      be removed. Note however that this module does not support missing
-      values.
+      be removed. Some tree and algorithm combinations support 
+      :ref:`missing values <tree_missing_value_support>`.
 
     - The cost of using the tree (i.e., predicting data) is logarithmic in the
       number of data points used to train the tree.
@@ -146,6 +146,10 @@ Once trained, you can plot the tree with the :func:`plot_tree` function::
    :scale: 75
    :align: center
 
+|details-start|
+**Alternative ways to export trees**
+|details-split|
+
 We can also export the tree in `Graphviz
 <https://www.graphviz.org/>`_ format using the :func:`export_graphviz`
 exporter. If you use the `conda <https://conda.io>`_ package manager, the graphviz binaries
@@ -212,6 +216,8 @@ of external libraries and is more compact:
     |   |   |--- class: 2
     <BLANKLINE>
 
+|details-end|
+
 .. topic:: Examples:
 
  * :ref:`sphx_glr_auto_examples_tree_plot_iris_dtc.py`
@@ -281,7 +287,6 @@ of shape ``(n_samples, n_outputs)`` then the resulting estimator will:
   * Output a list of n_output arrays of class probabilities upon
     ``predict_proba``.
 
-
 The use of multi-output trees for regression is demonstrated in
 :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py`. In this example, the input
 X is a single real value and the outputs Y are the sine and cosine of X.
@@ -303,16 +308,20 @@ the lower half of those faces.
 
 .. topic:: Examples:
 
- * :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py`
- * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py`
+  * :ref:`sphx_glr_auto_examples_tree_plot_tree_regression_multioutput.py`
+  * :ref:`sphx_glr_auto_examples_miscellaneous_plot_multioutput_face_completion.py`
 
-.. topic:: References:
+|details-start|
+**References**
+|details-split|
 
  * M. Dumont et al,  `Fast multi-class image annotation with random subwindows
    and multiple output randomized trees
    <http://www.montefiore.ulg.ac.be/services/stochastic/pubs/2009/DMWG09/dumont-visapp09-shortpaper.pdf>`_, International Conference on
    Computer Vision Theory and Applications 2009
 
+|details-end|
+
 .. _tree_complexity:
 
 Complexity
@@ -403,6 +412,10 @@ Tree algorithms: ID3, C4.5, C5.0 and CART
 What are all the various decision tree algorithms and how do they differ
 from each other? Which one is implemented in scikit-learn?
 
+|details-start|
+**Various decision tree algorithms**
+|details-split|
+
 ID3_ (Iterative Dichotomiser 3) was developed in 1986 by Ross Quinlan.
 The algorithm creates a multiway tree, finding for each node (i.e. in
 a greedy manner) the categorical feature that will yield the largest
@@ -428,6 +441,8 @@ it differs in that it supports numerical target variables (regression) and
 does not compute rule sets. CART constructs binary trees using the feature
 and threshold that yield the largest information gain at each node.
 
+|details-end|
+
 scikit-learn uses an optimized version of the CART algorithm; however, the
 scikit-learn implementation does not support categorical variables for now.
 
@@ -500,8 +515,9 @@ Log Loss or Entropy:
 
     H(Q_m) = - \sum_k p_{mk} \log(p_{mk})
 
-
-.. note::
+|details-start|
+Shannon entropy:
+|details-split|
 
   The entropy criterion computes the Shannon entropy of the possible classes. It
   takes the class frequencies of the training data points that reached a given
@@ -531,6 +547,8 @@ Log Loss or Entropy:
 
       \mathrm{LL}(D, T) = \sum_{m \in T} \frac{n_m}{n} H(Q_m)
 
+|details-end|
+
 Regression criteria
 -------------------
 
@@ -577,7 +595,7 @@ Note that it fits much slower than the MSE criterion.
 Missing Values Support
 ======================
 
-:class:`~tree.DecisionTreeClassifier` and :class:`~tree.DecisionTreeRegressor`
+:class:`DecisionTreeClassifier` and :class:`DecisionTreeRegressor`
 have built-in support for missing values when `splitter='best'` and criterion is
 `'gini'`, `'entropy`', or `'log_loss'`, for classification or
 `'squared_error'`, `'friedman_mse'`, or `'poisson'` for regression.
@@ -671,7 +689,9 @@ be pruned. This process stops when the pruned tree's minimal
 
     * :ref:`sphx_glr_auto_examples_tree_plot_cost_complexity_pruning.py`
 
-.. topic:: References:
+|details-start|
+**References**
+|details-split|
 
     .. [BRE] L. Breiman, J. Friedman, R. Olshen, and C. Stone. Classification
       and Regression Trees. Wadsworth, Belmont, CA, 1984.
@@ -685,3 +705,5 @@ be pruned. This process stops when the pruned tree's minimal
 
     * T. Hastie, R. Tibshirani and J. Friedman. Elements of Statistical
       Learning, Springer, 2009.
+
+|details-end|
diff --git a/doc/modules/unsupervised_reduction.rst b/doc/modules/unsupervised_reduction.rst
index 6e16886064cfc..90c80714c3131 100644
--- a/doc/modules/unsupervised_reduction.rst
+++ b/doc/modules/unsupervised_reduction.rst
@@ -31,7 +31,7 @@ capture well the variance of the original features. See :ref:`decompositions`.
 Random projections
 -------------------
 
-The module: :mod:`random_projection` provides several tools for data
+The module: :mod:`~sklearn.random_projection` provides several tools for data
 reduction by random projections. See the relevant section of the
 documentation: :ref:`random_projection`.
 
@@ -55,6 +55,5 @@ similarly.
 
    Note that if features have very different scaling or statistical
    properties, :class:`cluster.FeatureAgglomeration` may not be able to
-   capture the links between related features. Using a 
+   capture the links between related features. Using a
    :class:`preprocessing.StandardScaler` can be useful in these settings.
-
diff --git a/doc/related_projects.rst b/doc/related_projects.rst
index 9cc70ad89ffff..10304a7070be0 100644
--- a/doc/related_projects.rst
+++ b/doc/related_projects.rst
@@ -21,9 +21,6 @@ enhance the functionality of scikit-learn's estimators.
 
 **Data formats**
 
-- `Fast svmlight / libsvm file loader <https://github.com/mblondel/svmlight-loader>`_
-  Fast and memory-efficient svmlight / libsvm file loader for Python.
-
 - `sklearn_pandas <https://github.com/paulgb/sklearn-pandas/>`_ bridge for
   scikit-learn pipelines and pandas data frame with dedicated transformers.
 
@@ -64,19 +61,20 @@ enhance the functionality of scikit-learn's estimators.
   It incorporates multiple modeling libraries under one API, and
   the objects that EvalML creates use an sklearn-compatible API.
 
-**Experimentation frameworks**
+**Experimentation and model registry frameworks**
+
+- `MLFlow <https://mlflow.org/>`_ MLflow is an open source platform to manage the ML
+  lifecycle, including experimentation, reproducibility, deployment, and a central
+  model registry.
 
 - `Neptune <https://neptune.ai/>`_ Metadata store for MLOps,
-  built for teams that run a lot of experiments.‌ It gives you a single
+  built for teams that run a lot of experiments. It gives you a single
   place to log, store, display, organize, compare, and query all your
   model building metadata.
 
 - `Sacred <https://github.com/IDSIA/Sacred>`_ Tool to help you configure,
   organize, log and reproduce experiments
 
-- `REP <https://github.com/yandex/REP>`_ Environment for conducting data-driven
-  research in a consistent and reproducible way
-
 - `Scikit-Learn Laboratory
   <https://skll.readthedocs.io/en/latest/index.html>`_  A command-line
   wrapper around scikit-learn that makes it easy to run machine learning
@@ -91,10 +89,7 @@ enhance the functionality of scikit-learn's estimators.
   debugging/inspecting machine learning models and explaining their
   predictions.
 
-- `mlxtend <https://github.com/rasbt/mlxtend>`_ Includes model visualization
-  utilities.
-
-- `sklearn-evaluation <https://github.com/ploomber/sklearn-evaluation>`_ 
+- `sklearn-evaluation <https://github.com/ploomber/sklearn-evaluation>`_
   Machine learning model evaluation made easy: plots, tables, HTML reports,
   experiment tracking and Jupyter notebook analysis. Visual analysis, model
   selection, evaluation and diagnostics.
@@ -140,7 +135,15 @@ enhance the functionality of scikit-learn's estimators.
 - `treelite <https://treelite.readthedocs.io>`_
   Compiles tree-based ensemble models into C code for minimizing prediction
   latency.
-  
+
+- `micromlgen <https://github.com/eloquentarduino/micromlgen>`_
+  MicroML brings Machine Learning algorithms to microcontrollers.
+  Supports several scikit-learn classifiers by transpiling them to C code.
+
+- `emlearn <https://emlearn.org>`_
+  Implements scikit-learn estimators in C99 for embedded devices and microcontrollers.
+  Supports several classifier, regression and outlier detection models.
+
 **Model throughput**
 
 - `Intel(R) Extension for scikit-learn <https://github.com/intel/scikit-learn-intelex>`_
@@ -161,12 +164,40 @@ project. The following are projects providing interfaces similar to
 scikit-learn for additional learning algorithms, infrastructures
 and tasks.
 
-**Structured learning**
+**Time series and forecasting**
+
+- `Darts <https://unit8co.github.io/darts/>`_ Darts is a Python library for
+  user-friendly forecasting and anomaly detection on time series. It contains a variety
+  of models, from classics such as ARIMA to deep neural networks. The forecasting
+  models can all be used in the same way, using fit() and predict() functions, similar
+  to scikit-learn.
+
+- `sktime <https://github.com/alan-turing-institute/sktime>`_ A scikit-learn compatible
+  toolbox for machine learning with time series including time series
+  classification/regression and (supervised/panel) forecasting.
+
+- `skforecast <https://github.com/JoaquinAmatRodrigo/skforecast>`_ A python library
+  that eases using scikit-learn regressors as multi-step forecasters. It also works
+  with any regressor compatible with the scikit-learn API.
+
+- `tslearn <https://github.com/tslearn-team/tslearn>`_ A machine learning library for
+  time series that offers tools for pre-processing and feature extraction as well as
+  dedicated models for clustering, classification and regression.
 
-- `tslearn <https://github.com/tslearn-team/tslearn>`_ A machine learning library for time series
-  that offers tools for pre-processing and feature extraction as well as dedicated models for clustering, classification and regression.
+**Gradient (tree) boosting**
 
-- `sktime <https://github.com/alan-turing-institute/sktime>`_ A scikit-learn compatible toolbox for machine learning with time series including time series classification/regression and (supervised/panel) forecasting.
+Note scikit-learn own modern gradient boosting estimators
+:class:`~sklearn.ensemble.HistGradientBoostingClassifier` and
+:class:`~sklearn.ensemble.HistGradientBoostingRegressor`.
+
+- `XGBoost <https://github.com/dmlc/xgboost>`_ XGBoost is an optimized distributed
+  gradient boosting library designed to be highly efficient, flexible and portable.
+
+- `LightGBM <https://lightgbm.readthedocs.io>`_ LightGBM is a gradient boosting
+  framework that uses tree based learning algorithms. It is designed to be distributed
+  and efficient.
+
+**Structured learning**
 
 - `HMMLearn <https://github.com/hmmlearn/hmmlearn>`_ Implementation of hidden
   markov models that was previously part of scikit-learn.
@@ -182,21 +213,9 @@ and tasks.
   (`CRFsuite <http://www.chokkan.org/software/crfsuite/>`_ wrapper with
   sklearn-like API).
 
-- `skforecast <https://github.com/JoaquinAmatRodrigo/skforecast>`_ A python library
-  that eases using scikit-learn regressors as multi-step forecasters. It also works
-  with any regressor compatible with the scikit-learn API.
 
 **Deep neural networks etc.**
 
-- `nolearn <https://github.com/dnouri/nolearn>`_ A number of wrappers and
-  abstractions around existing neural network libraries
-
-- `Keras <https://www.tensorflow.org/api_docs/python/tf/keras>`_ High-level API for
-  TensorFlow with a scikit-learn inspired API.
-
-- `lasagne <https://github.com/Lasagne/Lasagne>`_ A lightweight library to
-  build and train neural networks in Theano.
-
 - `skorch <https://github.com/dnouri/skorch>`_ A scikit-learn compatible
   neural network library that wraps PyTorch.
 
@@ -219,9 +238,6 @@ and tasks.
 
 **Other regression and classification**
 
-- `xgboost <https://github.com/dmlc/xgboost>`_ Optimised gradient boosted decision
-  tree library.
-
 - `ML-Ensemble <https://mlens.readthedocs.io/>`_ Generalized
   ensemble learning (stacking, blending, subsemble, deep ensembles,
   etc.).
@@ -232,10 +248,6 @@ and tasks.
 - `py-earth <https://github.com/scikit-learn-contrib/py-earth>`_ Multivariate
   adaptive regression splines
 
-- `Kernel Regression <https://github.com/jmetzen/kernel_regression>`_
-  Implementation of Nadaraya-Watson kernel regression with automatic bandwidth
-  selection
-
 - `gplearn <https://github.com/trevorstephens/gplearn>`_ Genetic Programming
   for symbolic regression tasks.
 
@@ -245,8 +257,6 @@ and tasks.
 - `seglearn <https://github.com/dmbee/seglearn>`_ Time series and sequence
   learning using sliding window segmentation.
 
-- `libOPF <https://github.com/jppbsi/LibOPF>`_ Optimal path forest classifier
-
 - `fastFM <https://github.com/ibayer/fastFM>`_ Fast factorization machine
   implementation compatible with scikit-learn
 
@@ -266,6 +276,7 @@ and tasks.
 
 - `hdbscan <https://github.com/scikit-learn-contrib/hdbscan>`_ HDBSCAN and Robust Single
   Linkage clustering algorithms for robust variable density clustering.
+  As of scikit-learn version 1.3.0, there is :class:`~sklearn.cluster.HDBSCAN`.
 
 - `spherecluster <https://github.com/clara-labs/spherecluster>`_ Spherical
   K-means and mixture of von Mises Fisher clustering routines for data on the
@@ -276,6 +287,8 @@ and tasks.
 - `categorical-encoding
   <https://github.com/scikit-learn-contrib/categorical-encoding>`_ A
   library of sklearn compatible categorical variable encoders.
+  As of scikit-learn version 1.3.0, there is
+  :class:`~sklearn.preprocessing.TargetEncoder`.
 
 - `imbalanced-learn
   <https://github.com/scikit-learn-contrib/imbalanced-learn>`_ Various
@@ -331,9 +344,6 @@ Recommendation Engine packages
 - `OpenRec <https://github.com/ylongqi/openrec>`_ TensorFlow-based
   neural-network inspired recommendation algorithms.
 
-- `Spotlight <https://github.com/maciejkula/spotlight>`_ Pytorch-based
-  implementation of deep recommender models.
-
 - `Surprise Lib <https://surpriselib.com/>`_ Library for explicit feedback
   datasets.
 
@@ -355,9 +365,6 @@ Domain specific packages
 
 - `AstroML <https://www.astroml.org/>`_  Machine learning for astronomy.
 
-- `MSMBuilder <http://msmbuilder.org/>`_  Machine learning for protein
-  conformational dynamics time series.
-
 Translations of scikit-learn documentation
 ------------------------------------------
 
diff --git a/doc/sphinxext/allow_nan_estimators.py b/doc/sphinxext/allow_nan_estimators.py
index e8f94506daaa5..89d7077bce2b5 100755
--- a/doc/sphinxext/allow_nan_estimators.py
+++ b/doc/sphinxext/allow_nan_estimators.py
@@ -1,11 +1,12 @@
-from sklearn.utils import all_estimators
-from sklearn.utils.estimator_checks import _construct_instance
-from sklearn.utils._testing import SkipTest
-from docutils import nodes
 from contextlib import suppress
 
+from docutils import nodes
 from docutils.parsers.rst import Directive
 
+from sklearn.utils import all_estimators
+from sklearn.utils._testing import SkipTest
+from sklearn.utils.estimator_checks import _construct_instance
+
 
 class AllowNanEstimators(Directive):
     @staticmethod
diff --git a/doc/sphinxext/doi_role.py b/doc/sphinxext/doi_role.py
index 7d188969bb778..32e905fe650ea 100644
--- a/doc/sphinxext/doi_role.py
+++ b/doc/sphinxext/doi_role.py
@@ -15,7 +15,6 @@
 """
 
 from docutils import nodes, utils
-
 from sphinx.util.nodes import split_explicit_title
 
 
diff --git a/doc/sphinxext/github_link.py b/doc/sphinxext/github_link.py
index 3992d814b825e..d3e43c8ed0f5e 100644
--- a/doc/sphinxext/github_link.py
+++ b/doc/sphinxext/github_link.py
@@ -1,9 +1,9 @@
-from operator import attrgetter
 import inspect
-import subprocess
 import os
+import subprocess
 import sys
 from functools import partial
+from operator import attrgetter
 
 REVISION_CMD = "git rev-parse --short HEAD"
 
diff --git a/doc/templates/class.rst b/doc/templates/class.rst
index 79ff2cf807794..1e98be4099b73 100644
--- a/doc/templates/class.rst
+++ b/doc/templates/class.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}==============
 
diff --git a/doc/templates/class_with_call.rst b/doc/templates/class_with_call.rst
index f98b7dbbf6578..bc1567709c9d3 100644
--- a/doc/templates/class_with_call.rst
+++ b/doc/templates/class_with_call.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}===============
 
diff --git a/doc/templates/deprecated_class.rst b/doc/templates/deprecated_class.rst
index 857e2c28ce1da..5c31936f6fc36 100644
--- a/doc/templates/deprecated_class.rst
+++ b/doc/templates/deprecated_class.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}==============
 
diff --git a/doc/templates/deprecated_class_with_call.rst b/doc/templates/deprecated_class_with_call.rst
index a04efcb80be07..072a31112be50 100644
--- a/doc/templates/deprecated_class_with_call.rst
+++ b/doc/templates/deprecated_class_with_call.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}===============
 
diff --git a/doc/templates/deprecated_class_without_init.rst b/doc/templates/deprecated_class_without_init.rst
index c019992493610..a26afbead5451 100644
--- a/doc/templates/deprecated_class_without_init.rst
+++ b/doc/templates/deprecated_class_without_init.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}==============
 
diff --git a/doc/templates/deprecated_function.rst b/doc/templates/deprecated_function.rst
index 6d13ac6aca2de..ead5abec27076 100644
--- a/doc/templates/deprecated_function.rst
+++ b/doc/templates/deprecated_function.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}====================
 
diff --git a/doc/templates/display_all_class_methods.rst b/doc/templates/display_all_class_methods.rst
index 1211296bb57ce..b179473cf841e 100644
--- a/doc/templates/display_all_class_methods.rst
+++ b/doc/templates/display_all_class_methods.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}==============
 
diff --git a/doc/templates/display_only_from_estimator.rst b/doc/templates/display_only_from_estimator.rst
index 6d064133fc5e2..9981910dc8be7 100644
--- a/doc/templates/display_only_from_estimator.rst
+++ b/doc/templates/display_only_from_estimator.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}==============
 
diff --git a/doc/templates/function.rst b/doc/templates/function.rst
index f4b11eda770e4..93d368ecfe6d5 100644
--- a/doc/templates/function.rst
+++ b/doc/templates/function.rst
@@ -1,3 +1,8 @@
+..
+    The empty line below should not be removed. It is added such that the `rst_prolog`
+    is added before the :mod: directive. Otherwise, the rendering will show as a
+    paragraph instead of a header.
+
 :mod:`{{module}}`.{{objname}}
 {{ underline }}====================
 
diff --git a/doc/templates/index.html b/doc/templates/index.html
index db5d02ab9d9ab..a20da900bafcb 100644
--- a/doc/templates/index.html
+++ b/doc/templates/index.html
@@ -42,9 +42,10 @@ <h4 class="sk-landing-subheader text-white font-italic mb-3">Machine Learning in
           <p class="card-text">Identifying which category an object belongs to.</p>
           <p class="card-text"><strong>Applications:</strong> Spam detection, image recognition.</br>
           <strong>Algorithms:</strong>
-          <a href="modules/svm.html#svm-classification">SVM</a>,
+          <a href="modules/ensemble.html#histogram-based-gradient-boosting">Gradient boosting</a>,
           <a href="modules/neighbors.html#classification">nearest neighbors</a>,
           <a href="modules/ensemble.html#forest">random forest</a>,
+          <a href="modules/linear_model.html#logistic-regression">logistic regression</a>,
           and <a href="supervised_learning.html#supervised-learning">more...</a></p>
         </div>
         <div class="overflow-hidden mx-2 text-center flex-fill">
@@ -62,9 +63,10 @@ <h4 class="sk-landing-subheader text-white font-italic mb-3">Machine Learning in
           <p class="card-text">Predicting a continuous-valued attribute associated with an object.</p>
           <p class="card-text"><strong>Applications:</strong> Drug response, Stock prices.</br>
           <strong>Algorithms:</strong>
-          <a href="modules/svm.html#svm-regression">SVR</a>,
+          <a href="modules/ensemble.html#histogram-based-gradient-boosting">Gradient boosting</a>,
           <a href="modules/neighbors.html#regression">nearest neighbors</a>,
           <a href="modules/ensemble.html#forest">random forest</a>,
+          <a href="modules/linear_model.html#ridge-regression-and-classification">ridge</a>,
           and <a href="supervised_learning.html#supervised-learning">more...</a></p>
         </div>
         <div class="overflow-hidden mx-2 text-center flex-fill">
@@ -83,8 +85,9 @@ <h4 class="sk-landing-subheader text-white font-italic mb-3">Machine Learning in
           <p class="card-text"><strong>Applications:</strong> Customer segmentation, Grouping experiment outcomes</br>
           <strong>Algorithms:</strong>
           <a href="modules/clustering.html#k-means">k-Means</a>,
-          <a href="modules/clustering.html#spectral-clustering">spectral clustering</a>,
-          <a href="modules/clustering.html#mean-shift">mean-shift</a>,
+          <a href="modules/clustering.html#hdbscan">HDBSCAN</a>,
+          <a href="modules/clustering.html#hierarchical-clustering">hierarchical
+	  clustering</a>,
           and <a href="modules/clustering.html#clustering">more...</a></p>
         </div>
         <div class="overflow-hidden mx-2 text-center flex-fill">
@@ -166,44 +169,18 @@ <h4 class="sk-landing-call-header">News</h4>
         <li><strong>On-going development:</strong>
         <a href="https://scikit-learn.org/dev/whats_new.html"><strong>What's new</strong> (Changelog)</a>
         </li>
-        <li><strong>March 2023.</strong> scikit-learn 1.2.2 is available for download (<a href="whats_new/v1.2.html#version-1-2-2">Changelog</a>).
-        </li>
-        <li><strong>January 2023.</strong> scikit-learn 1.2.1 is available for download (<a href="whats_new/v1.2.html#version-1-2-1">Changelog</a>).
-        </li>
-        <li><strong>December 2022.</strong> scikit-learn 1.2.0 is available for download (<a href="whats_new/v1.2.html#version-1-2-0">Changelog</a>).
-        </li>
-        <li><strong>October 2022.</strong> scikit-learn 1.1.3 is available for download (<a href="whats_new/v1.1.html#version-1-1-3">Changelog</a>).
-        </li>
-        <li><strong>August 2022.</strong> scikit-learn 1.1.2 is available for download (<a href="whats_new/v1.1.html#version-1-1-2">Changelog</a>).
-        </li>
-        <li><strong>May 2022.</strong> scikit-learn 1.1.1 is available for download (<a href="whats_new/v1.1.html#version-1-1-1">Changelog</a>).
-        </li>
-        <li><strong>May 2022.</strong> scikit-learn 1.1.0 is available for download (<a href="whats_new/v1.1.html#version-1-1-0">Changelog</a>).
-        </li>
-        <li><strong>December 2021.</strong> scikit-learn 1.0.2 is available for download (<a href="whats_new/v1.0.html#version-1-0-2">Changelog</a>).
+        <li><strong>October 2023.</strong> scikit-learn 1.3.2 is available for download (<a href="whats_new/v1.3.html#version-1-3-2">Changelog</a>).
         </li>
-        <li><strong>October 2021.</strong> scikit-learn 1.0.1 is available for download (<a href="whats_new/v1.0.html#version-1-0-1">Changelog</a>).
+	      <li><strong>September 2023.</strong> scikit-learn 1.3.1 is available for download (<a href="whats_new/v1.3.html#version-1-3-1">Changelog</a>).
         </li>
-        <li><strong>September 2021.</strong> scikit-learn 1.0 is available for download (<a href="whats_new/v1.0.html#version-1-0">Changelog</a>).
+        <li><strong>June 2023.</strong> scikit-learn 1.3.0 is available for download (<a href="whats_new/v1.3.html#version-1-3-0">Changelog</a>).
         </li>
-        <li><strong>April 2021.</strong> scikit-learn 0.24.2 is available for download (<a href="whats_new/v0.24.html#version-0-24-2">Changelog</a>).
-        </li>
-        <li><strong>January 2021.</strong> scikit-learn 0.24.1 is available for download (<a href="whats_new/v0.24.html#version-0-24-1">Changelog</a>).
-        </li>
-        <li><strong>December 2020.</strong> scikit-learn 0.24.0 is available for download (<a href="whats_new/v0.24.html#version-0-24-0">Changelog</a>).
-        </li>
-        <li><strong>August 2020.</strong> scikit-learn 0.23.2 is available for download (<a href="whats_new/v0.23.html#version-0-23-2">Changelog</a>).
-        </li>
-        <li><strong>May 2020.</strong> scikit-learn 0.23.1 is available for download (<a href="whats_new/v0.23.html#version-0-23-1">Changelog</a>).
-        </li>
-        <li><strong>May 2020.</strong> scikit-learn 0.23.0 is available for download (<a href="whats_new/v0.23.html#version-0-23-0">Changelog</a>).
-        </li>
-        <li><strong>Scikit-learn from 0.23 requires Python 3.6 or newer.</strong>
+        <li><strong>March 2023.</strong> scikit-learn 1.2.2 is available for download (<a href="whats_new/v1.2.html#version-1-2-2">Changelog</a>).
         </li>
-        <li><strong>March 2020.</strong> scikit-learn 0.22.2 is available for download (<a href="whats_new/v0.22.html#version-0-22-2">Changelog</a>).
-        <li><strong>January 2020.</strong> scikit-learn 0.22.1 is available for download (<a href="whats_new/v0.22.html#version-0-22-1">Changelog</a>).
-        <li><strong>December 2019.</strong> scikit-learn 0.22 is available for download (<a href="whats_new/v0.22.html#version-0-22-0">Changelog</a> and <a href="{{ pathto('auto_examples/release_highlights/plot_release_highlights_0_22_0') }}">Release Highlights</a>).
+        <li><strong>January 2023.</strong> scikit-learn 1.2.1 is available for download (<a href="whats_new/v1.2.html#version-1-2-1">Changelog</a>).
         </li>
+        <li><strong>December 2022.</strong> scikit-learn 1.2.0 is available for download (<a href="whats_new/v1.2.html#version-1-2-0">Changelog</a>).
+	</li>
         </ul>
       </div>
       <div class="col-md-4">
@@ -274,14 +251,13 @@ <h4 class="sk-landing-call-header">Who uses scikit-learn?</h4>
                   scikit-learn development and maintenance are financially supported by
                 </p>
                 <img class="sk-footer-funding-logo" src="_static/inria-small.png" title="INRIA">
-                <img class="sk-footer-funding-logo" src="_static/bcg-small.png" title="Boston Consulting Group" >
+                <img class="sk-footer-funding-logo" src="_static/chanel-small.png" title="Chanel" >
                 <img class="sk-footer-funding-logo" src="_static/axa-small.png" title="AXA Assurances" >
                 <img class="sk-footer-funding-logo" src="_static/bnp-small.png" title="BNP Paris Bas Cardif" >
-                <img class="sk-footer-funding-logo" src="_static/fujitsu-small.png" title="Fujitsu" >
                 <img class="sk-footer-funding-logo" src="_static/microsoft-small.png" title="Microsoft" >
                 <img class="sk-footer-funding-logo" src="_static/dataiku-small.png" title="Dataiku" >
-                <img class="sk-footer-funding-logo" src="_static/logo_APHP.png" title="APHP" >
                 <img class="sk-footer-funding-logo" src="_static/huggingface_logo-noborder.png" title="Hugging Face" >
+                <img class="sk-footer-funding-logo" src="_static/nvidia-small.png" title="Nvidia" >
                 <img class="sk-footer-funding-logo" src="_static/quansight-labs-small.png" title="Quansight Labs" >
         </div>
         </a>
diff --git a/doc/themes/scikit-learn-modern/javascript.html b/doc/themes/scikit-learn-modern/javascript.html
index 635dfbd779b2a..be4cf26073441 100644
--- a/doc/themes/scikit-learn-modern/javascript.html
+++ b/doc/themes/scikit-learn-modern/javascript.html
@@ -13,6 +13,9 @@
 </script>
 {% endif %}
 
+<script src="{{ pathto('_static/clipboard.min.js', 1) }}"></script>
+<script src="{{ pathto('_static/copybutton.js', 1) }}"></script>
+
 <script>
 $(document).ready(function() {
     /* Add a [>>>] button on the top-right corner of code samples to hide
@@ -26,18 +29,10 @@
 
     // get the styles from the current theme
     pre.parent().parent().css('position', 'relative');
-    var hide_text = 'Hide prompts and outputs';
-    var show_text = 'Show prompts and outputs';
 
     // create and add the button to all the code blocks that contain >>>
     div.each(function(index) {
         var jthis = $(this);
-        if (jthis.find('.gp').length > 0) {
-            var button = $('<span class="copybutton">&gt;&gt;&gt;</span>');
-            button.attr('title', hide_text);
-            button.data('hidden', 'false');
-            jthis.prepend(button);
-        }
         // tracebacks (.gt) contain bare text elements that need to be
         // wrapped in a span to work with .nextUntil() (see later)
         jthis.find('pre:has(.gt)').contents().filter(function() {
@@ -45,27 +40,6 @@
         }).wrap('<span>');
     });
 
-    // define the behavior of the button when it's clicked
-    $('.copybutton').click(function(e){
-        e.preventDefault();
-        var button = $(this);
-        if (button.data('hidden') === 'false') {
-            // hide the code output
-            button.parent().find('.go, .gp, .gt').hide();
-            button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'hidden');
-            button.css('text-decoration', 'line-through');
-            button.attr('title', show_text);
-            button.data('hidden', 'true');
-        } else {
-            // show the code output
-            button.parent().find('.go, .gp, .gt').show();
-            button.next('pre').find('.gt').nextUntil('.gp, .go').css('visibility', 'visible');
-            button.css('text-decoration', 'none');
-            button.attr('title', hide_text);
-            button.data('hidden', 'false');
-        }
-    });
-
 	/*** Add permalink buttons next to glossary terms ***/
 	$('dl.glossary > dt[id]').append(function() {
 		return ('<a class="headerlink" href="#' +
diff --git a/doc/themes/scikit-learn-modern/layout.html b/doc/themes/scikit-learn-modern/layout.html
index 4503e8f8229f8..191434c7ec2e2 100644
--- a/doc/themes/scikit-learn-modern/layout.html
+++ b/doc/themes/scikit-learn-modern/layout.html
@@ -33,7 +33,7 @@
   <link rel="stylesheet" href="{{ pathto(css, 1) }}" type="text/css" />
     {%- endif %}
   {%- endfor %}
-  <link rel="stylesheet" href="{{ pathto('_static/' + style, 1) }}" type="text/css" />
+  <link rel="stylesheet" href="{{ pathto('_static/' + styles[0], 1) }}" type="text/css" />
 <script id="documentation_options" data-url_root="{{ pathto('', 1) }}" src="{{ pathto('_static/documentation_options.js', 1) }}"></script>
 <script src="{{ pathto('_static/js/vendor/jquery-3.6.3.slim.min.js', 1) }}"></script>
 {%- block extrahead %} {% endblock %}
diff --git a/doc/themes/scikit-learn-modern/static/css/theme.css b/doc/themes/scikit-learn-modern/static/css/theme.css
index 90cfeb9300490..21e1a2336a553 100644
--- a/doc/themes/scikit-learn-modern/static/css/theme.css
+++ b/doc/themes/scikit-learn-modern/static/css/theme.css
@@ -147,6 +147,61 @@ div.clearer {
   clear: both;
 }
 
+/* details / summary */
+
+div.sk-page-content details {
+    margin: 4ex 0pt;
+}
+
+div.sk-page-content summary.btn {
+    display: list-item;
+    padding: 6px 20px;
+    border: 1pt solid #999;
+}
+
+div.sk-page-content details div.card {
+    padding: 0pt .5ex;
+    margin: 1ex 0pt;
+    border: 1px solid #e9ecef;
+    border-left-width: .25rem;
+    border-radius: .25rem;
+    background: rgb(250, 252, 253)
+}
+
+div.sk-page-content summary {
+  position: relative; /* Needed for the tooltips */
+}
+
+div.sk-page-content summary .tooltiptext {
+  visibility: hidden;
+  width: 120px;
+  background-color: black;
+  color: #fff;
+  text-align: center;
+  border-radius: 6px;
+  padding: 5px 0;
+  position: absolute;
+  z-index: 1;
+  bottom: 150%;
+  left: 50%;
+  margin-left: -60px;
+}
+
+div.sk-page-content summary .tooltiptext::after {
+  content: "";
+  position: absolute;
+  top: 100%;
+  left: 50%;
+  margin-left: -5px;
+  border-width: 5px;
+  border-style: solid;
+  border-color: black transparent transparent transparent;
+}
+
+div.sk-page-content summary:hover .tooltiptext {
+  visibility: visible;
+}
+
 /* Button */
 
 .sk-btn-primary {
@@ -606,17 +661,25 @@ div.sk-sidebar-global-toc ul ul {
 div.sk-page-content h1 {
   background-color: #cde8ef;
   padding: 0.5rem;
+  margin-top: calc(max(1rem, 1vh));
   border-radius: 0 1rem;
   text-align: center;
   font-size: 2rem;
   word-wrap: break-word;
 }
 
+/* General sibling selector: does not apply to first h1, to avoid gap in
+ * top of page */
+div.sk-page-content ~ h1 {
+    margin-top: calc(max(2.5rem, 1vh));
+}
+
 div.sk-page-content h2 {
   padding: 0.5rem;
   background-color: #BED4EB;
   border-radius: 0.3rem;
   font-size: 1.5rem;
+  margin-top: calc(max(2rem, .7vh));
   margin-bottom: 1rem;
   word-wrap: break-word;
 }
@@ -627,6 +690,7 @@ div.sk-page-content h3 {
   border-radius: 0.3rem;
   font-size: 1.2rem;
   word-wrap: break-word;
+  margin-top: 1.5rem;
 }
 
 div.sk-page-content h4 {
@@ -865,14 +929,8 @@ dt.label {
   padding-right: 0.5rem;
 }
 
-/* copy button */
-div.highlight:hover span.copybutton {
-  background-color: #3F556B;
-  color: white;
-}
-
-div.highlight:hover span.copybutton:hover {
-    background-color: #20252B;
+button.copybtn {
+  border: 0;
 }
 
 div.body img {
@@ -900,34 +958,6 @@ img.align-right, figure.align-right,
   margin-left: 1em;
 }
 
-/* copybutton */
-
-.copybutton {
-  cursor: pointer;
-  position: absolute;
-  top: 0px;
-  right: 0px;
-  border: 1px solid rgb(221, 221, 221);
-  color: rgb(221, 221, 221);
-  font-family: monospace;
-  padding-left: 0.2rem;
-  padding-right: 0.2rem;
-}
-
-div.highlight:hover span.copybutton::after {
-  background: #3F556B;
-  border-radius: 0.25rem;
-  color: white;
-  content: attr(title);
-  padding: 0.25rem;
-  position: absolute;
-  z-index: 98;
-  width: 100px;
-  font-size: 0.7rem;
-  top: 0;
-  right: 0;
-}
-
 /* world */
 
 img.avatar {
@@ -995,13 +1025,12 @@ div.sphx-glr-thumbcontainer {
   padding: 0;
 }
 
-
 @media screen and (min-width: 1540px) {
-  .sphx-glr-download-link-note {
-    position: absolute;
+  div.sphx-glr-download-link-note.admonition.note {
     position: absolute;
     left: 98%;
     width: 20ex;
+    margin-top: calc(max(5.75rem, 1vh));
   }
 }
 
@@ -1175,8 +1204,11 @@ div.install > input:checked + label {
 .sk-expandable {
   display: none;
 }
+.sk-expandable + .copybtn {
+  display: none;
+}
 
-div.highlight span.sk-expandable:before {
+pre.sk-expandable > span:before {
   content: "$ ";
 }
 
@@ -1185,15 +1217,24 @@ div.highlight span.sk-expandable:before {
 #quickstart-conda:checked  ~* [data-packager="conda"] {
   display: block;
 }
+#quickstart-conda:checked  ~* [data-packager="conda"] + .copybtn {
+  display: block;
+}
 
 #quickstart-conda:checked ~ #quickstart-venv ~ label[for="quickstart-venv"] {
   display: none;
 }
+#quickstart-conda:checked ~ #quickstart-venv ~ label[for="quickstart-venv"] + .copybtn {
+  display: none;
+}
 
 /* for pip */
 #quickstart-pip:checked ~* [data-packager="pip"] {
   display: block;
 }
+#quickstart-pip:checked ~* [data-packager="pip"] + .copybtn {
+  display: block;
+}
 
 #quickstart-pip:checked ~ label[for="quickstart-venv"]:before  {
   content: "Use pip virtualenv";
@@ -1202,20 +1243,37 @@ div.highlight span.sk-expandable:before {
 #quickstart-win:not(:checked) ~* [data-os="windows"] {
   display: none;
 }
+#quickstart-win:not(:checked) ~* [data-os="windows"] + .copybtn {
+  display: none;
+}
+
 #quickstart-lin:not(:checked) ~* [data-os="linux"] {
   display: none;
 }
+#quickstart-lin:not(:checked) ~* [data-os="linux"] + .copybtn {
+  display: none;
+}
+
 #quickstart-mac:not(:checked) ~* [data-os="mac"] {
   display: none;
 }
+#quickstart-mac:not(:checked) ~* [data-os="mac"] + .copybtn {
+  display: none;
+}
 
 #quickstart-venv:not(:checked) ~* [data-venv=""] {
   display: none;
 }
+#quickstart-venv:not(:checked) ~* [data-venv=""] + .copybtn {
+  display: none;
+}
 
 #quickstart-venv:checked ~* [data-venv="no"] {
   display: none;
 }
+#quickstart-venv:checked ~* [data-venv="no"] + .copybtn {
+  display: none;
+}
 
 /* Algorithm cheet-sheet */
 
diff --git a/doc/tutorial/machine_learning_map/pyparsing.py b/doc/tutorial/machine_learning_map/pyparsing.py
index 0418cf2b51528..88d00e138d02c 100644
--- a/doc/tutorial/machine_learning_map/pyparsing.py
+++ b/doc/tutorial/machine_learning_map/pyparsing.py
@@ -21,7 +21,7 @@
 # TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #
-# flake8: noqa
+# ruff: noqa
 
 __doc__ = \
 """
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index 629d163be4370..d7477b279662d 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -465,7 +465,7 @@ Linear kernel
 
     >>> svc = svm.SVC(kernel='linear')
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_001.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 Polynomial kernel
@@ -477,7 +477,7 @@ Polynomial kernel
     ...               degree=3)
     >>> # degree: polynomial degree
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_002.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 RBF kernel (Radial Basis Function)
@@ -489,7 +489,17 @@ RBF kernel (Radial Basis Function)
     >>> # gamma: inverse of size of
     >>> # radial kernel
 
-.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_003.png
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_004.png
+   :target: ../../auto_examples/svm/plot_svm_kernels.html
+
+Sigmoid kernel
+^^^^^^^^^^^^^^
+
+::
+
+    >>> svc = svm.SVC(kernel='sigmoid')
+
+.. image:: /auto_examples/svm/images/sphx_glr_plot_svm_kernels_005.png
    :target: ../../auto_examples/svm/plot_svm_kernels.html
 
 
diff --git a/doc/tutorial/text_analytics/working_with_text_data.rst b/doc/tutorial/text_analytics/working_with_text_data.rst
index a878b766bd4fa..61fd02da05548 100644
--- a/doc/tutorial/text_analytics/working_with_text_data.rst
+++ b/doc/tutorial/text_analytics/working_with_text_data.rst
@@ -520,7 +520,7 @@ Exercise 1: Language identification
 -----------------------------------
 
 - Write a text classification pipeline using a custom preprocessor and
-  ``CharNGramAnalyzer`` using data from Wikipedia articles as training set.
+  ``TfidfVectorizer`` set up to use character based n-grams, using data from Wikipedia articles as the training set.
 
 - Evaluate the performance on some held out test set.
 
diff --git a/doc/whats_new/older_versions.rst b/doc/whats_new/older_versions.rst
index 221de4cdb7e4c..5a1d6a1c7c13f 100644
--- a/doc/whats_new/older_versions.rst
+++ b/doc/whats_new/older_versions.rst
@@ -101,7 +101,7 @@ Changelog
 - Add MultiTaskLasso and MultiTaskElasticNet for joint feature selection,
   by `Alexandre Gramfort`_.
 
-- Added :func:`metrics.auc_score` and
+- Added `metrics.auc_score` and
   :func:`metrics.average_precision_score` convenience functions by `Andreas
   Müller`_.
 
@@ -121,7 +121,7 @@ Changelog
   and OrthogonalMatchingPursuit) by `Vlad Niculae`_ and
   `Alexandre Gramfort`_.
 
-- Fixes in :class:`~decomposition.ProbabilisticPCA` score function by Wei Li.
+- Fixes in `decomposition.ProbabilisticPCA` score function by Wei Li.
 
 - Fixed feature importance computation in
   :ref:`gradient_boosting`.
@@ -136,8 +136,8 @@ API changes summary
   with it's order reversed, in order to keep it consistent with the order
   of the returned ``fpr`` and ``tpr``.
 
-- In :class:`hmm` objects, like :class:`~hmm.GaussianHMM`,
-  :class:`~hmm.MultinomialHMM`, etc., all parameters must be passed to the
+- In `hmm` objects, like `hmm.GaussianHMM`,
+  `hmm.MultinomialHMM`, etc., all parameters must be passed to the
   object when initialising it and not through ``fit``. Now ``fit`` will
   only accept the data as an input parameter.
 
@@ -180,7 +180,7 @@ API changes summary
   :meth:`~ensemble.GradientBoostingClassifier.staged_predict_proba`, and
   :meth:`~ensemble.GradientBoostingClassifier.staged_predict`.
 
-- :class:`~svm.sparse.SVC` and other sparse SVM classes are now deprecated.
+- `svm.sparse.SVC` and other sparse SVM classes are now deprecated.
   The all classes in the :ref:`svm` module now automatically select the
   sparse or dense representation base on the input.
 
@@ -282,8 +282,8 @@ Highlights
 - Added BIC/AIC model selection to classical :ref:`gmm` and unified
   the API with the remainder of scikit-learn, by `Bertrand Thirion`_
 
-- Added :class:`~sklearn.cross_validation.StratifiedShuffleSplit`, which is
-  a :class:`~sklearn.cross_validation.ShuffleSplit` with balanced splits,
+- Added `sklearn.cross_validation.StratifiedShuffleSplit`, which is
+  a `sklearn.cross_validation.ShuffleSplit` with balanced splits,
   by Yannick Schwartz.
 
 - :class:`~sklearn.neighbors.NearestCentroid` classifier added, along with a
@@ -307,15 +307,15 @@ Other changes
 - Regressors can now be used as base estimator in the :ref:`multiclass`
   module by `Mathieu Blondel`_.
 
-- Added n_jobs option to :func:`metrics.pairwise.pairwise_distances`
+- Added n_jobs option to :func:`metrics.pairwise_distances`
   and :func:`metrics.pairwise.pairwise_kernels` for parallel computation,
   by `Mathieu Blondel`_.
 
 - :ref:`k_means` can now be run in parallel, using the ``n_jobs`` argument
-  to either :ref:`k_means` or :class:`KMeans`, by `Robert Layton`_.
+  to either :ref:`k_means` or :class:`cluster.KMeans`, by `Robert Layton`_.
 
 - Improved :ref:`cross_validation` and :ref:`grid_search` documentation
-  and introduced the new :func:`cross_validation.train_test_split`
+  and introduced the new `cross_validation.train_test_split`
   helper function by `Olivier Grisel`_
 
 - :class:`~svm.SVC` members ``coef_`` and ``intercept_`` changed sign for
@@ -330,7 +330,7 @@ Other changes
   API and fixed a bug that caused possible negative IDF,
   by `Olivier Grisel`_.
 
-- Beam pruning option in :class:`_BaseHMM` module has been removed since it
+- Beam pruning option in `_BaseHMM` module has been removed since it
   is difficult to Cythonize. If you are interested in contributing a Cython
   version, you can use the python version in the git history as a reference.
 
@@ -340,31 +340,31 @@ Other changes
 API changes summary
 -------------------
 
-- :class:`~covariance.EllipticEnvelop` is now deprecated - Please use :class:`~covariance.EllipticEnvelope`
-  instead.
+- `covariance.EllipticEnvelop` is now deprecated.
+  Please use :class:`~covariance.EllipticEnvelope` instead.
 
 - ``NeighborsClassifier`` and ``NeighborsRegressor`` are gone in the module
-  :ref:`neighbors`. Use the classes :class:`KNeighborsClassifier`,
-  :class:`RadiusNeighborsClassifier`, :class:`KNeighborsRegressor`
-  and/or :class:`RadiusNeighborsRegressor` instead.
+  :ref:`neighbors`. Use the classes :class:`~neighbors.KNeighborsClassifier`,
+  :class:`~neighbors.RadiusNeighborsClassifier`, :class:`~neighbors.KNeighborsRegressor`
+  and/or :class:`~neighbors.RadiusNeighborsRegressor` instead.
 
 - Sparse classes in the :ref:`sgd` module are now deprecated.
 
-- In :class:`~mixture.GMM`, :class:`~mixture.DPGMM` and :class:`~mixture.VBGMM`,
+- In `mixture.GMM`, `mixture.DPGMM` and `mixture.VBGMM`,
   parameters must be passed to an object when initialising it and not through
   ``fit``. Now ``fit`` will only accept the data as an input parameter.
 
-- methods ``rvs`` and ``decode`` in :class:`GMM` module are now deprecated.
+- methods ``rvs`` and ``decode`` in `GMM` module are now deprecated.
   ``sample`` and ``score`` or ``predict`` should be used instead.
 
 - attribute ``_scores`` and ``_pvalues`` in univariate feature selection
   objects are now deprecated.
   ``scores_`` or ``pvalues_`` should be used instead.
 
-- In :class:`LogisticRegression`, :class:`LinearSVC`, :class:`SVC` and
-  :class:`NuSVC`, the ``class_weight`` parameter is now an initialization
-  parameter, not a parameter to fit. This makes grid searches
-  over this parameter possible.
+- In :class:`~linear_model.LogisticRegression`, :class:`~svm.LinearSVC`,
+  :class:`~svm.SVC` and :class:`~svm.NuSVC`, the ``class_weight`` parameter is
+  now an initialization parameter, not a parameter to fit. This makes grid
+  searches over this parameter possible.
 
 - LFW ``data`` is now always shape ``(n_samples, n_features)`` to be
   consistent with the Olivetti faces dataset. Use ``images`` and
@@ -375,14 +375,14 @@ API changes summary
   ``'ovr'`` being the default.  This does not change the default behavior
   but hopefully is less confusing.
 
-- Class :class:`~feature_selection.text.Vectorizer` is deprecated and
-  replaced by :class:`~feature_selection.text.TfidfVectorizer`.
+- Class `feature_selection.text.Vectorizer` is deprecated and
+  replaced by `feature_selection.text.TfidfVectorizer`.
 
 - The preprocessor / analyzer nested structure for text feature
   extraction has been removed. All those features are
   now directly passed as flat constructor arguments
-  to :class:`~feature_selection.text.TfidfVectorizer` and
-  :class:`~feature_selection.text.CountVectorizer`, in particular the
+  to `feature_selection.text.TfidfVectorizer` and
+  `feature_selection.text.CountVectorizer`, in particular the
   following parameters are now used:
 
 - ``analyzer`` can be ``'word'`` or ``'char'`` to switch the default
@@ -401,27 +401,27 @@ API changes summary
   ``vocabulary_`` attribute to be consistent with the project
   conventions.
 
-- Class :class:`~feature_selection.text.TfidfVectorizer` now derives directly
-  from :class:`~feature_selection.text.CountVectorizer` to make grid
+- Class `feature_selection.text.TfidfVectorizer` now derives directly
+  from `feature_selection.text.CountVectorizer` to make grid
   search trivial.
 
-- methods ``rvs`` in :class:`_BaseHMM` module are now deprecated.
+- methods ``rvs`` in `_BaseHMM` module are now deprecated.
   ``sample`` should be used instead.
 
-- Beam pruning option in :class:`_BaseHMM` module is removed since it is
+- Beam pruning option in `_BaseHMM` module is removed since it is
   difficult to be Cythonized. If you are interested, you can look in the
   history codes by git.
 
 - The SVMlight format loader now supports files with both zero-based and
   one-based column indices, since both occur "in the wild".
 
-- Arguments in class :class:`ShuffleSplit` are now consistent with
-  :class:`StratifiedShuffleSplit`. Arguments ``test_fraction`` and
+- Arguments in class :class:`~model_selection.ShuffleSplit` are now consistent with
+  :class:`~model_selection.StratifiedShuffleSplit`. Arguments ``test_fraction`` and
   ``train_fraction`` are deprecated and renamed to ``test_size`` and
   ``train_size`` and can accept both ``float`` and ``int``.
 
-- Arguments in class :class:`Bootstrap` are now consistent with
-  :class:`StratifiedShuffleSplit`. Arguments ``n_test`` and
+- Arguments in class `Bootstrap` are now consistent with
+  :class:`~model_selection.StratifiedShuffleSplit`. Arguments ``n_test`` and
   ``n_train`` are deprecated and renamed to ``test_size`` and
   ``train_size`` and can accept both ``float`` and ``int``.
 
@@ -557,7 +557,7 @@ Changelog
   by `Mathieu Blondel`_.
 
 - Make :func:`~sklearn.preprocessing.scale` and
-  :class:`~sklearn.preprocessing.Scaler` work on sparse matrices by
+  `sklearn.preprocessing.Scaler` work on sparse matrices by
   `Olivier Grisel`_
 
 - Feature importances using decision trees and/or forest of trees,
@@ -566,7 +566,7 @@ Changelog
 - Parallel implementation of forests of randomized trees by
   `Gilles Louppe`_.
 
-- :class:`~sklearn.cross_validation.ShuffleSplit` can subsample the train
+- `sklearn.cross_validation.ShuffleSplit` can subsample the train
   sets as well as the test sets by `Olivier Grisel`_.
 
 - Errors in the build of the documentation fixed by `Andreas Müller`_.
@@ -582,7 +582,7 @@ version 0.9:
   had ``overwrite_`` parameters; these have been replaced with ``copy_``
   parameters with exactly the opposite meaning.
 
-  This particularly affects some of the estimators in :mod:`linear_model`.
+  This particularly affects some of the estimators in :mod:`~sklearn.linear_model`.
   The default behavior is still to copy everything passed in.
 
 - The SVMlight dataset loader :func:`~sklearn.datasets.load_svmlight_file` no
@@ -596,10 +596,10 @@ version 0.9:
 - The :ref:`covariance` module now has a robust estimator of
   covariance, the Minimum Covariance Determinant estimator.
 
-- Cluster evaluation metrics in :mod:`metrics.cluster` have been refactored
+- Cluster evaluation metrics in :mod:`~sklearn.metrics.cluster` have been refactored
   but the changes are backwards compatible. They have been moved to the
-  :mod:`metrics.cluster.supervised`, along with
-  :mod:`metrics.cluster.unsupervised` which contains the Silhouette
+  `metrics.cluster.supervised`, along with
+  `metrics.cluster.unsupervised` which contains the Silhouette
   Coefficient.
 
 - The ``permutation_test_score`` function now behaves the same way as
@@ -622,7 +622,7 @@ version 0.9:
 
 - ``BaseDictionaryLearning`` class replaced by ``SparseCodingMixin``.
 
-- :func:`~sklearn.utils.extmath.fast_svd` has been renamed
+- `sklearn.utils.extmath.fast_svd` has been renamed
   :func:`~sklearn.utils.extmath.randomized_svd` and the default
   oversampling is now fixed to 10 additional random vectors instead
   of doubling the number of components to extract. The new behavior
@@ -744,7 +744,7 @@ Changelog
 - Text feature extraction optimizations by Lars Buitinck
 
 - Chi-Square feature selection
-  (:func:`feature_selection.univariate_selection.chi2`) by `Lars Buitinck`_.
+  (:func:`feature_selection.chi2`) by `Lars Buitinck`_.
 
 - :ref:`sample_generators` module refactoring by `Gilles Louppe`_
 
@@ -778,7 +778,7 @@ Changelog
 
 - Scalability improvements to :func:`metrics.roc_curve` by Olivier Hervieu
 
-- Distance helper functions :func:`metrics.pairwise.pairwise_distances`
+- Distance helper functions :func:`metrics.pairwise_distances`
   and :func:`metrics.pairwise.pairwise_kernels` by Robert Layton
 
 - :class:`Mini-Batch K-Means <cluster.MiniBatchKMeans>` by Nelle Varoquaux and Peter Prettenhofer.
@@ -1047,7 +1047,7 @@ Changelog
 
 - Sanity checks for SVM-based classes [`Mathieu Blondel`_].
 
-- Refactoring of :class:`~neighbors.NeighborsClassifier` and
+- Refactoring of `neighbors.NeighborsClassifier` and
   :func:`neighbors.kneighbors_graph`: added different algorithms for
   the k-Nearest Neighbor Search and implemented a more stable
   algorithm for finding barycenter weights. Also added some
@@ -1055,7 +1055,7 @@ Changelog
   `notes_neighbors
   <https://github.com/scikit-learn/scikit-learn/wiki/Neighbors-working-notes>`_ for more information [`Fabian Pedregosa`_].
 
-- Documentation improvements: Added :class:`~pca.RandomizedPCA` and
+- Documentation improvements: Added `pca.RandomizedPCA` and
   :class:`~linear_model.LogisticRegression` to the class
   reference. Also added references of matrices used for clustering
   and other fixes [`Gael Varoquaux`_, `Fabian Pedregosa`_, `Mathieu
@@ -1067,12 +1067,12 @@ Changelog
   :class:`~linear_model.LogisticRegression` [`Fabian Pedregosa`_].
 
 - Performance and API improvements to
-  :func:`metrics.euclidean_distances` and to
-  :class:`~pca.RandomizedPCA` [`James Bergstra`_].
+  :func:`metrics.pairwise.euclidean_distances` and to
+  `pca.RandomizedPCA` [`James Bergstra`_].
 
 - Fix compilation issues under NetBSD [Kamel Ibn Hassen Derouiche]
 
-- Allow input sequences of different lengths in :class:`~hmm.GaussianHMM`
+- Allow input sequences of different lengths in `hmm.GaussianHMM`
   [`Ron Weiss`_].
 
 - Fix bug in affinity propagation caused by incorrect indexing [Xinfan Meng]
@@ -1141,7 +1141,7 @@ Changelog
   extraction.
 
 - Improved sparse matrix support, both in main classes
-  (:class:`~grid_search.GridSearchCV`) as in modules
+  (:class:`~model_selection.GridSearchCV`) as in modules
   sklearn.svm.sparse and sklearn.linear_model.sparse.
 
 - Lots of cool new examples and a new section that uses real-world
@@ -1218,9 +1218,9 @@ New classes
 -----------
 
 - Support for sparse matrices in some classifiers of modules
-  ``svm`` and ``linear_model`` (see :class:`~svm.sparse.SVC`,
-  :class:`~svm.sparse.SVR`, :class:`~svm.sparse.LinearSVC`,
-  :class:`~linear_model.sparse.Lasso`, :class:`~linear_model.sparse.ElasticNet`)
+  ``svm`` and ``linear_model`` (see `svm.sparse.SVC`,
+  `svm.sparse.SVR`, `svm.sparse.LinearSVC`,
+  `linear_model.sparse.Lasso`, `linear_model.sparse.ElasticNet`)
 
 - New :class:`~pipeline.Pipeline` object to compose different estimators.
 
@@ -1237,8 +1237,7 @@ New classes
   :class:`~linear_model.LassoLars`.
 
 - New Hidden Markov Models module (see classes
-  :class:`~hmm.GaussianHMM`, :class:`~hmm.MultinomialHMM`,
-  :class:`~hmm.GMMHMM`)
+  `hmm.GaussianHMM`, `hmm.MultinomialHMM`, `hmm.GMMHMM`)
 
 - New module feature_extraction (see :ref:`class reference
   <feature_extraction_ref>`)
@@ -1383,4 +1382,3 @@ Earlier versions
 
 Earlier versions included contributions by Fred Mailhot, David Cooke,
 David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
-
diff --git a/doc/whats_new/v0.13.rst b/doc/whats_new/v0.13.rst
index 10b4d3b5b783f..00be322bf38fc 100644
--- a/doc/whats_new/v0.13.rst
+++ b/doc/whats_new/v0.13.rst
@@ -14,7 +14,7 @@ The 0.13.1 release only fixes some bugs and does not add any new functionality.
 Changelog
 ---------
 
-- Fixed a testing error caused by the function :func:`cross_validation.train_test_split` being
+- Fixed a testing error caused by the function `cross_validation.train_test_split` being
   interpreted as a test by `Yaroslav Halchenko`_.
 
 - Fixed a bug in the reassignment of small clusters in the :class:`cluster.MiniBatchKMeans`
@@ -128,7 +128,7 @@ Changelog
   trees, by `Peter Prettenhofer`_  and `Gilles Louppe`_.
 
 - Partial dependence plots for :ref:`gradient_boosting` in
-  :func:`ensemble.partial_dependence.partial_dependence` by `Peter
+  `ensemble.partial_dependence.partial_dependence` by `Peter
   Prettenhofer`_. See :ref:`sphx_glr_auto_examples_inspection_plot_partial_dependence.py` for an
   example.
 
@@ -161,7 +161,7 @@ Changelog
 - Faster and more robust :func:`metrics.confusion_matrix` and
   :ref:`clustering_evaluation` by Wei Li.
 
-- :func:`cross_validation.cross_val_score` now works with precomputed kernels
+- `cross_validation.cross_val_score` now works with precomputed kernels
   and affinity matrices, by `Andreas Müller`_.
 
 - LARS algorithm made more numerically stable with heuristics to drop
@@ -171,7 +171,7 @@ Changelog
 - Faster implementation of :func:`metrics.precision_recall_curve` by
   Conrad Lee.
 
-- New kernel :class:`metrics.chi2_kernel` by `Andreas Müller`_, often used
+- New kernel `metrics.chi2_kernel` by `Andreas Müller`_, often used
   in computer vision applications.
 
 - Fix of longstanding bug in :class:`naive_bayes.BernoulliNB` fixed by
@@ -184,7 +184,7 @@ Changelog
   :class:`ensemble.GradientBoostingRegressor` and
   :class:`ensemble.GradientBoostingClassifier` use the estimator
   :class:`tree.DecisionTreeRegressor` instead of the
-  :class:`tree._tree.Tree` data structure by `Arnaud Joly`_.
+  `tree._tree.Tree` data structure by `Arnaud Joly`_.
 
 - Fixed a floating point exception in the :ref:`decision trees <tree>`
   module, by Seberg.
@@ -209,7 +209,7 @@ Changelog
 - Fixed a bug in :class:`sklearn.svm.SVC` when using csr-matrices with
   unsorted indices by Xinfan Meng and `Andreas Müller`_.
 
-- :class:`MiniBatchKMeans`: Add random reassignment of cluster centers
+- :class:`cluster.MiniBatchKMeans`: Add random reassignment of cluster centers
   with little observations attached to them, by `Gael Varoquaux`_.
 
 
@@ -221,18 +221,18 @@ API changes summary
   :func:`decomposition.dict_learning`, :func:`decomposition.dict_learning_online`.
 
 - Renamed all occurrences of ``max_iters`` to ``max_iter`` for consistency.
-  This applies to :class:`semi_supervised.LabelPropagation` and
-  :class:`semi_supervised.label_propagation.LabelSpreading`.
+  This applies to `semi_supervised.LabelPropagation` and
+  `semi_supervised.label_propagation.LabelSpreading`.
 
 - Renamed all occurrences of ``learn_rate`` to ``learning_rate`` for
-  consistency in :class:`ensemble.BaseGradientBoosting` and
+  consistency in `ensemble.BaseGradientBoosting` and
   :class:`ensemble.GradientBoostingRegressor`.
 
 - The module ``sklearn.linear_model.sparse`` is gone. Sparse matrix support
   was already integrated into the "regular" linear models.
 
-- :func:`sklearn.metrics.mean_square_error`, which incorrectly returned the
-  accumulated error, was removed. Use ``mean_squared_error`` instead.
+- `sklearn.metrics.mean_square_error`, which incorrectly returned the
+  accumulated error, was removed. Use :func:`metrics.mean_squared_error` instead.
 
 - Passing ``class_weight`` parameters to ``fit`` methods is no longer
   supported. Pass them to estimator constructors instead.
@@ -244,17 +244,18 @@ API changes summary
   deprecated and will be removed in v0.14. Use the constructor option
   instead.
 
-- :class:`feature_extraction.text.DictVectorizer` now returns sparse
+- `feature_extraction.text.DictVectorizer` now returns sparse
   matrices in the CSR format, instead of COO.
 
-- Renamed ``k`` in :class:`cross_validation.KFold` and
-  :class:`cross_validation.StratifiedKFold` to ``n_folds``, renamed
+- Renamed ``k`` in `cross_validation.KFold` and
+  `cross_validation.StratifiedKFold` to ``n_folds``, renamed
   ``n_bootstraps`` to ``n_iter`` in ``cross_validation.Bootstrap``.
 
 - Renamed all occurrences of ``n_iterations`` to ``n_iter`` for consistency.
-  This applies to :class:`cross_validation.ShuffleSplit`,
-  :class:`cross_validation.StratifiedShuffleSplit`,
-  :func:`utils.randomized_range_finder` and :func:`utils.randomized_svd`.
+  This applies to `cross_validation.ShuffleSplit`,
+  `cross_validation.StratifiedShuffleSplit`,
+  :func:`utils.extmath.randomized_range_finder` and
+  :func:`utils.extmath.randomized_svd`.
 
 - Replaced ``rho`` in :class:`linear_model.ElasticNet` and
   :class:`linear_model.SGDClassifier` by ``l1_ratio``. The ``rho`` parameter
@@ -267,10 +268,10 @@ API changes summary
   store a list of paths in the case of multiple targets, rather than
   an array of paths.
 
-- The attribute ``gmm`` of :class:`hmm.GMMHMM` was renamed to ``gmm_``
+- The attribute ``gmm`` of `hmm.GMMHMM` was renamed to ``gmm_``
   to adhere more strictly with the API.
 
-- :func:`cluster.spectral_embedding` was moved to
+- `cluster.spectral_embedding` was moved to
   :func:`manifold.spectral_embedding`.
 
 - Renamed ``eig_tol`` in :func:`manifold.spectral_embedding`,
@@ -286,9 +287,9 @@ API changes summary
   multi-output problems.
 
 - The ``estimators_`` attribute of
-  :class:`ensemble.gradient_boosting.GradientBoostingRegressor` and
-  :class:`ensemble.gradient_boosting.GradientBoostingClassifier` is now an
-  array of :class:'tree.DecisionTreeRegressor'.
+  :class:`ensemble.GradientBoostingRegressor` and
+  :class:`ensemble.GradientBoostingClassifier` is now an
+  array of :class:`tree.DecisionTreeRegressor`.
 
 - Renamed ``chunk_size`` to ``batch_size`` in
   :class:`decomposition.MiniBatchDictionaryLearning` and
@@ -299,18 +300,18 @@ API changes summary
   Also, the dtype returned by ``predict`` now reflects the dtype of
   ``y`` during ``fit`` (used to be ``np.float``).
 
-- Changed default test_size in :func:`cross_validation.train_test_split`
+- Changed default test_size in `cross_validation.train_test_split`
   to None, added possibility to infer ``test_size`` from ``train_size`` in
-  :class:`cross_validation.ShuffleSplit` and
-  :class:`cross_validation.StratifiedShuffleSplit`.
+  `cross_validation.ShuffleSplit` and
+  `cross_validation.StratifiedShuffleSplit`.
 
-- Renamed function :func:`sklearn.metrics.zero_one` to
-  :func:`sklearn.metrics.zero_one_loss`. Be aware that the default behavior
-  in :func:`sklearn.metrics.zero_one_loss` is different from
-  :func:`sklearn.metrics.zero_one`: ``normalize=False`` is changed to
+- Renamed function `sklearn.metrics.zero_one` to
+  `sklearn.metrics.zero_one_loss`. Be aware that the default behavior
+  in `sklearn.metrics.zero_one_loss` is different from
+  `sklearn.metrics.zero_one`: ``normalize=False`` is changed to
   ``normalize=True``.
 
-- Renamed function :func:`metrics.zero_one_score` to
+- Renamed function `metrics.zero_one_score` to
   :func:`metrics.accuracy_score`.
 
 - :func:`datasets.make_circles` now has the same number of inner and outer points.
@@ -388,4 +389,3 @@ List of contributors for release 0.13 by number of commits.
  *   1  dengemann
  *   1  emanuele
  *   1  x006
-
diff --git a/doc/whats_new/v0.14.rst b/doc/whats_new/v0.14.rst
index 5abe7d12d2051..4bd04ad180c4e 100644
--- a/doc/whats_new/v0.14.rst
+++ b/doc/whats_new/v0.14.rst
@@ -13,7 +13,7 @@ Changelog
 ---------
 
 - Missing values with sparse and dense matrices can be imputed with the
-  transformer :class:`preprocessing.Imputer` by `Nicolas Trésegnie`_.
+  transformer `preprocessing.Imputer` by `Nicolas Trésegnie`_.
 
 - The core implementation of decisions trees has been rewritten from
   scratch, allowing for faster tree induction and lower memory
@@ -24,13 +24,13 @@ Changelog
   `Gilles Louppe`_. See the :ref:`AdaBoost <adaboost>` section of the user
   guide for details and examples.
 
-- Added :class:`grid_search.RandomizedSearchCV` and
-  :class:`grid_search.ParameterSampler` for randomized hyperparameter
+- Added `grid_search.RandomizedSearchCV` and
+  `grid_search.ParameterSampler` for randomized hyperparameter
   optimization. By `Andreas Müller`_.
 
 - Added :ref:`biclustering <biclustering>` algorithms
-  (:class:`sklearn.cluster.bicluster.SpectralCoclustering` and
-  :class:`sklearn.cluster.bicluster.SpectralBiclustering`), data
+  (`sklearn.cluster.bicluster.SpectralCoclustering` and
+  `sklearn.cluster.bicluster.SpectralBiclustering`), data
   generation methods (:func:`sklearn.datasets.make_biclusters` and
   :func:`sklearn.datasets.make_checkerboard`), and scoring metrics
   (:func:`sklearn.metrics.consensus_score`). By `Kemal Eren`_.
@@ -45,7 +45,7 @@ Changelog
 - Ability to pass one penalty (alpha value) per target in
   :class:`linear_model.Ridge`, by @eickenberg and `Mathieu Blondel`_.
 
-- Fixed :mod:`sklearn.linear_model.stochastic_gradient.py` L2 regularization
+- Fixed `sklearn.linear_model.stochastic_gradient.py` L2 regularization
   issue (minor practical significance).
   By :user:`Norbert Crombach <norbert>` and `Mathieu Blondel`_ .
 
@@ -55,8 +55,8 @@ Changelog
   to the documentation. See :ref:`Choosing the right estimator <ml_map>`.
   By `Jaques Grobler`_.
 
-- :class:`grid_search.GridSearchCV` and
-  :func:`cross_validation.cross_val_score` now support the use of advanced
+- `grid_search.GridSearchCV` and
+  `cross_validation.cross_val_score` now support the use of advanced
   scoring function such as area under the ROC curve and f-beta scores.
   See :ref:`scoring_parameter` for details. By `Andreas Müller`_
   and `Lars Buitinck`_.
@@ -71,7 +71,7 @@ Changelog
   by `Arnaud Joly`_.
 
 - Two new metrics :func:`metrics.hamming_loss` and
-  :func:`metrics.jaccard_similarity_score`
+  `metrics.jaccard_similarity_score`
   are added with multi-label support by `Arnaud Joly`_.
 
 - Speed and memory usage improvements in
@@ -121,8 +121,8 @@ Changelog
 - Feature selectors now share a mixin providing consistent ``transform``,
   ``inverse_transform`` and ``get_support`` methods. By `Joel Nothman`_.
 
-- A fitted :class:`grid_search.GridSearchCV` or
-  :class:`grid_search.RandomizedSearchCV` can now generally be pickled.
+- A fitted `grid_search.GridSearchCV` or
+  `grid_search.RandomizedSearchCV` can now generally be pickled.
   By `Joel Nothman`_.
 
 - Refactored and vectorized implementation of :func:`metrics.roc_curve`
@@ -138,7 +138,7 @@ Changelog
   By :user:`Eustache Diemert <oddskool>`.
 
 - The default number of components for
-  :class:`sklearn.decomposition.RandomizedPCA` is now correctly documented
+  `sklearn.decomposition.RandomizedPCA` is now correctly documented
   to be ``n_features``. This was the default behavior, so programs using it
   will continue to work as they did.
 
@@ -149,12 +149,12 @@ Changelog
 - Reduce memory footprint of FastICA by `Denis Engemann`_ and
   `Alexandre Gramfort`_.
 
-- Verbose output in :mod:`sklearn.ensemble.gradient_boosting` now uses
+- Verbose output in `sklearn.ensemble.gradient_boosting` now uses
   a column format and prints progress in decreasing frequency.
   It also shows the remaining time. By `Peter Prettenhofer`_.
 
-- :mod:`sklearn.ensemble.gradient_boosting` provides out-of-bag improvement
-  :attr:`~sklearn.ensemble.GradientBoostingRegressor.oob_improvement_`
+- `sklearn.ensemble.gradient_boosting` provides out-of-bag improvement
+  `oob_improvement_`
   rather than the OOB score for model selection. An example that shows
   how to use OOB estimates to select the number of trees was added.
   By `Peter Prettenhofer`_.
@@ -165,17 +165,17 @@ Changelog
 - New OrthogonalMatchingPursuitCV class by `Alexandre Gramfort`_
   and `Vlad Niculae`_.
 
-- Fixed a bug in :class:`sklearn.covariance.GraphLassoCV`: the
+- Fixed a bug in `sklearn.covariance.GraphLassoCV`: the
   'alphas' parameter now works as expected when given a list of
   values. By Philippe Gervais.
 
-- Fixed an important bug in :class:`sklearn.covariance.GraphLassoCV`
+- Fixed an important bug in `sklearn.covariance.GraphLassoCV`
   that prevented all folds provided by a CV object to be used (only
   the first 3 were used). When providing a CV object, execution
   time may thus increase significantly compared to the previous
   version (bug results are correct now). By Philippe Gervais.
 
-- :class:`cross_validation.cross_val_score` and the :mod:`grid_search`
+- `cross_validation.cross_val_score` and the `grid_search`
   module is now tested with multi-output data by `Arnaud Joly`_.
 
 - :func:`datasets.make_multilabel_classification` can now return
@@ -187,8 +187,8 @@ Changelog
   :class:`neighbors.RadiusNeighborsClassifier` support multioutput data
   by `Arnaud Joly`_.
 
-- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`NuSVC`,
-  :class:`OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be
+- Random state in LibSVM-based estimators (:class:`svm.SVC`, :class:`svm.NuSVC`,
+  :class:`svm.OneClassSVM`, :class:`svm.SVR`, :class:`svm.NuSVR`) can now be
   controlled.  This is useful to ensure consistency in the probability
   estimates for the classifiers trained with ``probability=True``. By
   `Vlad Niculae`_.
@@ -204,10 +204,10 @@ Changelog
 - Improved documentation on :ref:`multi-class, multi-label and multi-output
   classification <multiclass>` by `Yannick Schwartz`_ and `Arnaud Joly`_.
 
-- Better input and error handling in the :mod:`metrics` module by
+- Better input and error handling in the :mod:`sklearn.metrics` module by
   `Arnaud Joly`_ and `Joel Nothman`_.
 
-- Speed optimization of the :mod:`hmm` module by :user:`Mikhail Korobov <kmike>`
+- Speed optimization of the `hmm` module by :user:`Mikhail Korobov <kmike>`
 
 - Significant speed improvements for :class:`sklearn.cluster.DBSCAN`
   by `cleverless <https://github.com/cleverless>`_
@@ -216,7 +216,7 @@ Changelog
 API changes summary
 -------------------
 
-- The :func:`auc_score` was renamed :func:`roc_auc_score`.
+- The `auc_score` was renamed :func:`metrics.roc_auc_score`.
 
 - Testing scikit-learn with ``sklearn.test()`` is deprecated. Use
   ``nosetests sklearn`` from the command line.
@@ -233,10 +233,9 @@ API changes summary
   setting the ``return_models`` parameter to ``False``. By
   `Jaques Grobler`_ and `Alexandre Gramfort`_
 
-- :class:`grid_search.IterGrid` was renamed to
-  :class:`grid_search.ParameterGrid`.
+- `grid_search.IterGrid` was renamed to `grid_search.ParameterGrid`.
 
-- Fixed bug in :class:`KFold` causing imperfect class balance in some
+- Fixed bug in `KFold` causing imperfect class balance in some
   cases. By `Alexandre Gramfort`_ and Tadej Janež.
 
 - :class:`sklearn.neighbors.BallTree` has been refactored, and a
@@ -249,8 +248,8 @@ API changes summary
   By `Jake Vanderplas`_
 
 - Support for scipy.spatial.cKDTree within neighbors queries has been
-  removed, and the functionality replaced with the new :class:`KDTree`
-  class.
+  removed, and the functionality replaced with the new
+  :class:`sklearn.neighbors.KDTree` class.
 
 - :class:`sklearn.neighbors.KernelDensity` has been added, which performs
   efficient kernel density estimation with a variety of kernels.
@@ -264,11 +263,11 @@ API changes summary
 - ``gcv_mode="auto"`` no longer tries to perform SVD on a densified
   sparse matrix in :class:`sklearn.linear_model.RidgeCV`.
 
-- Sparse matrix support in :class:`sklearn.decomposition.RandomizedPCA`
+- Sparse matrix support in `sklearn.decomposition.RandomizedPCA`
   is now deprecated in favor of the new ``TruncatedSVD``.
 
-- :class:`cross_validation.KFold` and
-  :class:`cross_validation.StratifiedKFold` now enforce `n_folds >= 2`
+- `cross_validation.KFold` and
+  `cross_validation.StratifiedKFold` now enforce `n_folds >= 2`
   otherwise a ``ValueError`` is raised. By `Olivier Grisel`_.
 
 - :func:`datasets.load_files`'s ``charset`` and ``charset_errors``
@@ -386,4 +385,3 @@ List of contributors for release 0.14 by number of commits.
  *   1  Sturla Molden
  *   1  Thomas Jarosch
  *   1  Yaroslav Halchenko
- 
diff --git a/doc/whats_new/v0.15.rst b/doc/whats_new/v0.15.rst
index a2eafc63b0617..50412425dd7a5 100644
--- a/doc/whats_new/v0.15.rst
+++ b/doc/whats_new/v0.15.rst
@@ -58,9 +58,9 @@ Version 0.15.1
 Bug fixes
 ---------
 
-- Made :func:`cross_validation.cross_val_score` use
-  :class:`cross_validation.KFold` instead of
-  :class:`cross_validation.StratifiedKFold` on multi-output classification
+- Made `cross_validation.cross_val_score` use
+  `cross_validation.KFold` instead of
+  `cross_validation.StratifiedKFold` on multi-output classification
   problems. By :user:`Nikolay Mayorov <nmayorov>`.
 
 - Support unseen labels :class:`preprocessing.LabelBinarizer` to restore
@@ -75,7 +75,7 @@ Bug fixes
   per-class sum of prediction scores. By `Andreas Müller`_.
 
 - Made :func:`cross_validation.cross_val_score` and
-  :class:`grid_search.GridSearchCV` accept Python lists as input data.
+  `grid_search.GridSearchCV` accept Python lists as input data.
   This is especially useful for cross-validation and model selection of
   text processing pipelines. By `Andreas Müller`_.
 
@@ -141,7 +141,7 @@ New features
 - Shorthand constructors :func:`pipeline.make_pipeline` and
   :func:`pipeline.make_union` were added by `Lars Buitinck`_.
 
-- Shuffle option for :class:`cross_validation.StratifiedKFold`.
+- Shuffle option for `cross_validation.StratifiedKFold`.
   By :user:`Jeffrey Blackburne <jblackburne>`.
 
 - Incremental learning (``partial_fit``) for Gaussian Naive Bayes by
@@ -151,7 +151,7 @@ New features
   <neural_network.BernoulliRBM>`
   By :user:`Danny Sullivan <dsullivan7>`.
 
-- Added :func:`learning_curve <learning_curve.learning_curve>` utility to
+- Added `learning_curve` utility to
   chart performance with respect to training size. See
   :ref:`sphx_glr_auto_examples_model_selection_plot_learning_curve.py`. By Alexander Fabisch.
 
@@ -203,16 +203,16 @@ Enhancements
   threading backend of joblib 0.8 and releasing the GIL in the tree fitting
   Cython code.  By `Olivier Grisel`_ and `Gilles Louppe`_.
 
-- Speed improvement of the :mod:`sklearn.ensemble.gradient_boosting` module.
+- Speed improvement of the `sklearn.ensemble.gradient_boosting` module.
   By `Gilles Louppe`_ and `Peter Prettenhofer`_.
 
-- Various enhancements to the  :mod:`sklearn.ensemble.gradient_boosting`
+- Various enhancements to the `sklearn.ensemble.gradient_boosting`
   module: a ``warm_start`` argument to fit additional trees,
   a ``max_leaf_nodes`` argument to fit GBM style trees,
   a ``monitor`` fit argument to inspect the estimator during training, and
   refactoring of the verbose code. By `Peter Prettenhofer`_.
 
-- Faster :class:`sklearn.ensemble.ExtraTrees` by caching feature values.
+- Faster `sklearn.ensemble.ExtraTrees` by caching feature values.
   By `Arnaud Joly`_.
 
 - Faster depth-based tree building algorithm such as decision tree,
@@ -246,7 +246,7 @@ Enhancements
   significantly speedup computation by `Denis Engemann`_, and
   `Alexandre Gramfort`_.
 
-- Changed :class:`cross_validation.StratifiedKFold` to try and
+- Changed `cross_validation.StratifiedKFold` to try and
   preserve as much of the original ordering of samples as possible so as
   not to hide overfitting on datasets with a non-negligible level of
   samples dependency.
@@ -282,9 +282,8 @@ Enhancements
   By `Lars Buitinck`_.
 
 - Grid search and cross validation allow NaNs in the input arrays so that
-  preprocessors such as :class:`preprocessing.Imputer
-  <preprocessing.Imputer>` can be trained within the cross validation loop,
-  avoiding potentially skewed results.
+  preprocessors such as `preprocessing.Imputer` can be trained within the cross
+  validation loop, avoiding potentially skewed results.
 
 - Ridge regression can now deal with sample weights in feature space
   (only sample space until then). By :user:`Michael Eickenberg <eickenberg>`.
@@ -333,7 +332,7 @@ Bug fixes
 - Fixed bug in :class:`decomposition.MiniBatchDictionaryLearning` :
   ``partial_fit`` was not working properly.
 
-- Fixed bug in :class:`linear_model.stochastic_gradient` :
+- Fixed bug in `linear_model.stochastic_gradient` :
   ``l1_ratio`` was used as ``(1.0 - l1_ratio)`` .
 
 - Fixed bug in :class:`multiclass.OneVsOneClassifier` with string
@@ -353,10 +352,10 @@ Bug fixes
   By `Olivier Grisel`_.
 
 - Raise error in :class:`cluster.FeatureAgglomeration` and
-  :class:`cluster.WardAgglomeration` when no samples are given,
+  `cluster.WardAgglomeration` when no samples are given,
   rather than returning meaningless clustering.
 
-- Fixed bug in :class:`gradient_boosting.GradientBoostingRegressor` with
+- Fixed bug in `gradient_boosting.GradientBoostingRegressor` with
   ``loss='huber'``: ``gamma`` might have not been initialized.
 
 - Fixed feature importances as computed with a forest of randomized trees
@@ -366,36 +365,36 @@ Bug fixes
 API changes summary
 -------------------
 
-- :mod:`sklearn.hmm` is deprecated. Its removal is planned
+- `sklearn.hmm` is deprecated. Its removal is planned
   for the 0.17 release.
 
-- Use of :class:`covariance.EllipticEnvelop` has now been removed after
+- Use of `covariance.EllipticEnvelop` has now been removed after
   deprecation.
   Please use :class:`covariance.EllipticEnvelope` instead.
 
-- :class:`cluster.Ward` is deprecated. Use
+- `cluster.Ward` is deprecated. Use
   :class:`cluster.AgglomerativeClustering` instead.
 
-- :class:`cluster.WardClustering` is deprecated. Use
+- `cluster.WardClustering` is deprecated. Use
 - :class:`cluster.AgglomerativeClustering` instead.
 
-- :class:`cross_validation.Bootstrap` is deprecated.
-  :class:`cross_validation.KFold` or
-  :class:`cross_validation.ShuffleSplit` are recommended instead.
+- `cross_validation.Bootstrap` is deprecated.
+  `cross_validation.KFold` or
+  `cross_validation.ShuffleSplit` are recommended instead.
 
 - Direct support for the sequence of sequences (or list of lists) multilabel
   format is deprecated. To convert to and from the supported binary
   indicator matrix format, use
-  :class:`MultiLabelBinarizer <preprocessing.MultiLabelBinarizer>`.
+  :class:`preprocessing.MultiLabelBinarizer`.
   By `Joel Nothman`_.
 
-- Add score method to :class:`PCA <decomposition.PCA>` following the model of
+- Add score method to :class:`decomposition.PCA` following the model of
   probabilistic PCA and deprecate
-  :class:`ProbabilisticPCA <decomposition.ProbabilisticPCA>` model whose
+  `ProbabilisticPCA` model whose
   score implementation is not correct. The computation now also exploits the
   matrix inversion lemma for faster computation. By `Alexandre Gramfort`_.
 
-- The score method of :class:`FactorAnalysis <decomposition.FactorAnalysis>`
+- The score method of :class:`decomposition.FactorAnalysis`
   now returns the average log-likelihood of the samples. Use score_samples
   to get log-likelihood of each sample. By `Alexandre Gramfort`_.
 
@@ -410,7 +409,7 @@ API changes summary
   from version 0.13 in some classifiers. By `Joel Nothman`_.
 
 - Fix wrong ``explained_variance_ratio_`` attribute in
-  :class:`RandomizedPCA <decomposition.RandomizedPCA>`.
+  `RandomizedPCA`.
   By `Alexandre Gramfort`_.
 
 - Fit alphas for each ``l1_ratio`` instead of ``mean_l1_ratio`` in
@@ -445,11 +444,11 @@ API changes summary
   performance, you should modify the value of ``max_features``.
   By `Arnaud Joly`_.
 
-- Fix :func:`utils.compute_class_weight` when ``class_weight=="auto"``.
+- Fix :func:`utils.class_weight.compute_class_weight` when ``class_weight=="auto"``.
   Previously it was broken for input of non-integer ``dtype`` and the
   weighted array that was returned was wrong. By `Manoj Kumar`_.
 
-- Fix :class:`cross_validation.Bootstrap` to return ``ValueError``
+- Fix `cross_validation.Bootstrap` to return ``ValueError``
   when ``n_train + n_test > n``. By :user:`Ronald Phlypo <rphlypo>`.
 
 
@@ -620,4 +619,3 @@ List of contributors for release 0.15 by number of commits.
 *   1	Andrew Ash
 *   1	Pietro Zambelli
 *   1	staubda
-
diff --git a/doc/whats_new/v0.16.rst b/doc/whats_new/v0.16.rst
index a9c9f0b2614fd..ba96d03683bd0 100644
--- a/doc/whats_new/v0.16.rst
+++ b/doc/whats_new/v0.16.rst
@@ -60,7 +60,7 @@ Highlights
 - :class:`cluster.Birch` clustering method for large-scale datasets.
 
 - Scalable approximate nearest neighbors search with Locality-sensitive
-  hashing forests in :class:`neighbors.LSHForest`.
+  hashing forests in `neighbors.LSHForest`.
 
 - Improved error messages and better validation when using malformed input data.
 
@@ -72,7 +72,7 @@ Changelog
 New features
 ............
 
-- The new :class:`neighbors.LSHForest` implements locality-sensitive hashing
+- The new `neighbors.LSHForest` implements locality-sensitive hashing
   for approximate nearest neighbors search. By :user:`Maheshakya Wijewardena<maheshakya>`.
 
 - Added :class:`svm.LinearSVR`. This class uses the liblinear implementation
@@ -109,7 +109,7 @@ New features
   and :class:`SGDRegressor <linear_model.SGDRegressor>` By
   :user:`Danny Sullivan <dsullivan7>`.
 
-- Added :func:`cross_val_predict <cross_validation.cross_val_predict>`
+- Added `cross_val_predict`
   function which computes cross-validated estimates. By `Luis Pedro Coelho`_
 
 - Added :class:`linear_model.TheilSenRegressor`, a robust
@@ -131,7 +131,7 @@ New features
 - All solvers in :class:`linear_model.Ridge` now support `sample_weight`.
   By `Mathieu Blondel`_.
 
-- Added :class:`cross_validation.PredefinedSplit` cross-validation
+- Added `cross_validation.PredefinedSplit` cross-validation
   for fixed user-provided cross-validation folds.
   By :user:`Thomas Unterthiner <untom>`.
 
@@ -144,10 +144,10 @@ New features
 Enhancements
 ............
 
-- Add option ``return_distance`` in :func:`hierarchical.ward_tree`
+- Add option ``return_distance`` in `hierarchical.ward_tree`
   to return distances between nodes for both structured and unstructured
   versions of the algorithm. By `Matteo Visconti di Oleggio Castello`_.
-  The same option was added in :func:`hierarchical.linkage_tree`.
+  The same option was added in `hierarchical.linkage_tree`.
   By `Manoj Kumar`_
 
 - Add support for sample weights in scorer objects.  Metrics with sample
@@ -162,7 +162,7 @@ Enhancements
   and related. By `Manoj Kumar`_.
 
 - Add ``sample_weight`` parameter to
-  :func:`metrics.jaccard_similarity_score` and :func:`metrics.log_loss`.
+  `metrics.jaccard_similarity_score` and :func:`metrics.log_loss`.
   By :user:`Jatin Shah <jatinshah>`.
 
 - Support sparse multilabel indicator representation in
@@ -191,11 +191,11 @@ Enhancements
   single pass, when giving the option ``sort=False``. By :user:`Dan
   Blanchard <dan-blanchard>`.
 
-- :class:`GridSearchCV` and :class:`RandomizedSearchCV` can now be
-  configured to work with estimators that may fail and raise errors on
-  individual folds. This option is controlled by the `error_score`
-  parameter. This does not affect errors raised on re-fit. By
-  :user:`Michal Romaniuk <romaniukm>`.
+- :class:`model_selection.GridSearchCV` and
+  :class:`model.selection.RandomizedSearchCV` can now be configured to work
+  with estimators that may fail and raise errors on individual folds. This
+  option is controlled by the `error_score` parameter. This does not affect
+  errors raised on re-fit. By :user:`Michal Romaniuk <romaniukm>`.
 
 - Add ``digits`` parameter to `metrics.classification_report` to allow
   report to show different precision of floating point numbers. By
@@ -223,14 +223,14 @@ Enhancements
 - Added decision function for :class:`multiclass.OneVsOneClassifier`
   By `Raghav RV`_ and :user:`Kyle Beauchamp <kyleabeauchamp>`.
 
-- :func:`neighbors.kneighbors_graph` and :func:`radius_neighbors_graph`
+- `neighbors.kneighbors_graph` and `radius_neighbors_graph`
   support non-Euclidean metrics. By `Manoj Kumar`_
 
 - Parameter ``connectivity`` in :class:`cluster.AgglomerativeClustering`
   and family now accept callables that return a connectivity matrix.
   By `Manoj Kumar`_.
 
-- Sparse support for :func:`paired_distances`. By `Joel Nothman`_.
+- Sparse support for :func:`metrics.pairwise.paired_distances`. By `Joel Nothman`_.
 
 - :class:`cluster.DBSCAN` now supports sparse input and sample weights and
   has been optimized: the inner loop has been rewritten in Cython and
@@ -242,10 +242,10 @@ Enhancements
   :class:`tree.DecisionTreeClassifier`, :class:`ensemble.ExtraTreesClassifier`
   and :class:`tree.ExtraTreeClassifier`. By `Trevor Stephens`_.
 
-- :class:`grid_search.RandomizedSearchCV` now does sampling without
+- `grid_search.RandomizedSearchCV` now does sampling without
   replacement if all parameters are given as lists. By `Andreas Müller`_.
 
-- Parallelized calculation of :func:`pairwise_distances` is now supported
+- Parallelized calculation of :func:`metrics.pairwise_distances` is now supported
   for scipy metrics and custom callables. By `Joel Nothman`_.
 
 - Allow the fitting and scoring of all clustering algorithms in
@@ -254,8 +254,8 @@ Enhancements
 - More robust seeding and improved error messages in :class:`cluster.MeanShift`
   by `Andreas Müller`_.
 
-- Make the stopping criterion for :class:`mixture.GMM`,
-  :class:`mixture.DPGMM` and :class:`mixture.VBGMM` less dependent on the
+- Make the stopping criterion for `mixture.GMM`,
+  `mixture.DPGMM` and `mixture.VBGMM` less dependent on the
   number of samples by thresholding the average log-likelihood change
   instead of its sum over all samples. By `Hervé Bredin`_.
 
@@ -271,14 +271,14 @@ Enhancements
 - :class:`svm.SVC` fitted on sparse input now implements ``decision_function``.
   By `Rob Zinkov`_ and `Andreas Müller`_.
 
-- :func:`cross_validation.train_test_split` now preserves the input type,
+- `cross_validation.train_test_split` now preserves the input type,
   instead of converting to numpy arrays.
 
 
 Documentation improvements
 ..........................
 
-- Added example of using :class:`FeatureUnion` for heterogeneous input.
+- Added example of using :class:`pipeline.FeatureUnion` for heterogeneous input.
   By :user:`Matt Terry <mrterry>`
 
 - Documentation on scorers was improved, to highlight the handling of loss
@@ -306,16 +306,16 @@ Bug fixes
 .........
 - Metaestimators now support ducktyping for the presence of ``decision_function``,
   ``predict_proba`` and other methods. This fixes behavior of
-  :class:`grid_search.GridSearchCV`,
-  :class:`grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`,
+  `grid_search.GridSearchCV`,
+  `grid_search.RandomizedSearchCV`, :class:`pipeline.Pipeline`,
   :class:`feature_selection.RFE`, :class:`feature_selection.RFECV` when nested.
   By `Joel Nothman`_
 
 - The ``scoring`` attribute of grid-search and cross-validation methods is no longer
-  ignored when a :class:`grid_search.GridSearchCV` is given as a base estimator or
+  ignored when a `grid_search.GridSearchCV` is given as a base estimator or
   the base estimator doesn't have predict.
 
-- The function :func:`hierarchical.ward_tree` now returns the children in
+- The function `hierarchical.ward_tree` now returns the children in
   the same order for both the structured and unstructured versions. By
   `Matteo Visconti di Oleggio Castello`_.
 
@@ -327,7 +327,7 @@ Bug fixes
   length. By :user:`Michael Eickenberg <eickenberg>`.
 
 - Fix incomplete download of the dataset when
-  :func:`datasets.download_20newsgroups` is called. By `Manoj Kumar`_.
+  `datasets.download_20newsgroups` is called. By `Manoj Kumar`_.
 
 - Various fixes to the Gaussian processes subpackage by Vincent Dubourg
   and Jan Hendrik Metzen.
@@ -384,7 +384,7 @@ Bug fixes
   :class:`sklearn.neighbors.NearestNeighbors` and family, when the query
   data is not the same as fit data. By `Manoj Kumar`_.
 
-- Fix log-density calculation in the :class:`mixture.GMM` with
+- Fix log-density calculation in the `mixture.GMM` with
   tied covariance. By `Will Dawson`_
 
 - Fixed a scaling error in :class:`feature_selection.SelectFdr`
@@ -415,15 +415,15 @@ Bug fixes
 API changes summary
 -------------------
 
-- :class:`GridSearchCV <grid_search.GridSearchCV>` and
-  :func:`cross_val_score <cross_validation.cross_val_score>` and other
+- `GridSearchCV` and
+  `cross_val_score` and other
   meta-estimators don't convert pandas DataFrames into arrays any more,
   allowing DataFrame specific operations in custom estimators.
 
-- :func:`multiclass.fit_ovr`, :func:`multiclass.predict_ovr`,
-  :func:`predict_proba_ovr`,
-  :func:`multiclass.fit_ovo`, :func:`multiclass.predict_ovo`,
-  :func:`multiclass.fit_ecoc` and :func:`multiclass.predict_ecoc`
+- `multiclass.fit_ovr`, `multiclass.predict_ovr`,
+  `predict_proba_ovr`,
+  `multiclass.fit_ovo`, `multiclass.predict_ovo`,
+  `multiclass.fit_ecoc` and `multiclass.predict_ecoc`
   are deprecated. Use the underlying estimators instead.
 
 - Nearest neighbors estimators used to take arbitrary keyword arguments
@@ -439,11 +439,11 @@ API changes summary
   but previous versions accidentally returned only the positive
   probability. Fixed by Will Lamond and `Lars Buitinck`_.
 
-- Change default value of precompute in :class:`ElasticNet` and :class:`Lasso`
-  to False. Setting precompute to "auto" was found to be slower when
-  n_samples > n_features since the computation of the Gram matrix is
-  computationally expensive and outweighs the benefit of fitting the Gram
-  for just one alpha.
+- Change default value of precompute in :class:`linear_model.ElasticNet` and
+  :class:`linear_model.Lasso` to False. Setting precompute to "auto" was found
+  to be slower when n_samples > n_features since the computation of the Gram
+  matrix is computationally expensive and outweighs the benefit of fitting the
+  Gram for just one alpha.
   ``precompute="auto"`` is now deprecated and will be removed in 0.18
   By `Manoj Kumar`_.
 
@@ -467,8 +467,8 @@ API changes summary
   been removed. They were deprecated since 0.14
 
 - From now onwards, all estimators will uniformly raise ``NotFittedError``
-  (:class:`utils.validation.NotFittedError`), when any of the ``predict``
-  like methods are called before the model is fit. By `Raghav RV`_.
+  when any of the ``predict`` like methods are called before the model is fit.
+  By `Raghav RV`_.
 
 - Input data validation was refactored for more consistent input
   validation. The ``check_arrays`` function was replaced by ``check_array``
@@ -486,7 +486,7 @@ API changes summary
   as the first nearest neighbor.
 
 - `thresh` parameter is deprecated in favor of new `tol` parameter in
-  :class:`GMM`, :class:`DPGMM` and :class:`VBGMM`. See `Enhancements`
+  `GMM`, `DPGMM` and `VBGMM`. See `Enhancements`
   section for details. By `Hervé Bredin`_.
 
 - Estimators will treat input with dtype object as numeric when possible.
@@ -538,4 +538,3 @@ terrycojones, Thomas Delteil, Thomas Unterthiner, Tomas Kazmar, trevorstephens,
 tttthomasssss, Tzu-Ming Kuo, ugurcaliskan, ugurthemaster, Vinayak Mehta,
 Vincent Dubourg, Vjacheslav Murashkin, Vlad Niculae, wadawson, Wei Xue, Will
 Lamond, Wu Jiang, x0l, Xinfan Meng, Yan Yi, Yu-Chin
-
diff --git a/doc/whats_new/v0.17.rst b/doc/whats_new/v0.17.rst
index 7657d07712ab5..4e0cf0280ee4d 100644
--- a/doc/whats_new/v0.17.rst
+++ b/doc/whats_new/v0.17.rst
@@ -75,10 +75,10 @@ New features
   function into a ``Pipeline``-compatible transformer object.
   By Joe Jevnik.
 
-- The new classes :class:`cross_validation.LabelKFold` and
-  :class:`cross_validation.LabelShuffleSplit` generate train-test folds,
-  respectively similar to :class:`cross_validation.KFold` and
-  :class:`cross_validation.ShuffleSplit`, except that the folds are
+- The new classes `cross_validation.LabelKFold` and
+  `cross_validation.LabelShuffleSplit` generate train-test folds,
+  respectively similar to `cross_validation.KFold` and
+  `cross_validation.ShuffleSplit`, except that the folds are
   conditioned on a label array. By `Brian McFee`_, :user:`Jean
   Kossaifi <JeanKossaifi>` and `Gilles Louppe`_.
 
@@ -97,7 +97,7 @@ New features
   :class:`decomposition.NMF`. Previous solver based on Projected Gradient is
   still available setting new parameter ``solver`` to ``pg``, but is
   deprecated and will be removed in 0.19, along with
-  :class:`decomposition.ProjectedGradientNMF` and parameters ``sparseness``,
+  `decomposition.ProjectedGradientNMF` and parameters ``sparseness``,
   ``eta``, ``beta`` and ``nls_max_iter``. New parameters ``alpha`` and
   ``l1_ratio`` control L1 and L2 regularization, and ``shuffle`` adds a
   shuffling step in the ``cd`` solver.
@@ -109,7 +109,7 @@ Enhancements
   Barnes-Hut method, leading to much faster fitting. By Christopher Erick Moody.
   (:issue:`4025`)
 
-- :class:`cluster.mean_shift_.MeanShift` now supports parallel execution,
+- :class:`cluster.MeanShift` now supports parallel execution,
   as implemented in the ``mean_shift`` function. By :user:`Martino
   Sorbaro <martinosorb>`.
 
@@ -119,7 +119,7 @@ Enhancements
 - :class:`dummy.DummyClassifier` now supports a prior fitting strategy.
   By `Arnaud Joly`_.
 
-- Added a ``fit_predict`` method for :class:`mixture.GMM` and subclasses.
+- Added a ``fit_predict`` method for `mixture.GMM` and subclasses.
   By :user:`Cory Lorenz <clorenz7>`.
 
 - Added the :func:`metrics.label_ranking_loss` metric.
@@ -133,7 +133,7 @@ Enhancements
 - Added option to use multi-output regression metrics without averaging.
   By Konstantin Shmelkov and :user:`Michael Eickenberg<eickenberg>`.
 
-- Added ``stratify`` option to :func:`cross_validation.train_test_split`
+- Added ``stratify`` option to `cross_validation.train_test_split`
   for stratified splitting. By Miroslav Batchkarov.
 
 - The :func:`tree.export_graphviz` function now supports aesthetic
@@ -172,8 +172,8 @@ Enhancements
   :func:`sklearn.metrics.pairwise.cosine_similarity`. By
   :user:`Jaidev Deshpande <jaidevd>`.
 
-- Add :func:`minmax_scale` to provide a function interface for
-  :class:`MinMaxScaler`. By :user:`Thomas Unterthiner <untom>`.
+- Add :func:`preprocessing.minmax_scale` to provide a function interface for
+  :class:`preprocessing.MinMaxScaler`. By :user:`Thomas Unterthiner <untom>`.
 
 - ``dump_svmlight_file`` now handles multi-label datasets.
   By Chih-Wei Chang.
@@ -183,12 +183,12 @@ Enhancements
 
 - The "Wisconsin Breast Cancer" classical two-class classification dataset
   is now included in scikit-learn, available with
-  :func:`sklearn.dataset.load_breast_cancer`.
+  :func:`datasets.load_breast_cancer`.
 
 - Upgraded to joblib 0.9.3 to benefit from the new automatic batching of
   short tasks. This makes it possible for scikit-learn to benefit from
   parallelism when many very short tasks are executed in parallel, for
-  instance by the :class:`grid_search.GridSearchCV` meta-estimator
+  instance by the `grid_search.GridSearchCV` meta-estimator
   with ``n_jobs > 1`` used with a large grid of parameters on a small
   dataset. By `Vlad Niculae`_, `Olivier Grisel`_ and `Loic Esteve`_.
 
@@ -196,7 +196,7 @@ Enhancements
   https://github.com/joblib/joblib/blob/master/CHANGES.rst#release-093
 
 - Improved speed (3 times per iteration) of
-  :class:`decomposition.DictLearning` with coordinate descent method
+  `decomposition.DictLearning` with coordinate descent method
   from :class:`linear_model.Lasso`. By :user:`Arthur Mensch <arthurmensch>`.
 
 - Parallel processing (threaded) for queries of nearest neighbors
@@ -264,7 +264,7 @@ Enhancements
 
 - Added :func:`metrics.pairwise.laplacian_kernel`.  By `Clyde Fare <https://github.com/Clyde-fare>`_.
 
-- :class:`covariance.GraphLasso` allows separate control of the convergence criterion
+- `covariance.GraphLasso` allows separate control of the convergence criterion
   for the Elastic-Net subproblem via  the ``enet_tol`` parameter.
 
 - Improved verbosity in :class:`decomposition.DictionaryLearning`.
@@ -283,7 +283,7 @@ Enhancements
 
 - Added the ``fit_predict`` method to :class:`pipeline.Pipeline`.
 
-- Added the :func:`preprocessing.min_max_scale` function.
+- Added the :func:`preprocessing.minmax_scale` function.
 
 Bug fixes
 .........
@@ -294,16 +294,16 @@ Bug fixes
 - Fixed the output shape of :class:`linear_model.RANSACRegressor` to
   ``(n_samples, )``. By `Andreas Müller`_.
 
-- Fixed bug in :class:`decomposition.DictLearning` when ``n_jobs < 0``. By
+- Fixed bug in `decomposition.DictLearning` when ``n_jobs < 0``. By
   `Andreas Müller`_.
 
-- Fixed bug where :class:`grid_search.RandomizedSearchCV` could consume a
+- Fixed bug where `grid_search.RandomizedSearchCV` could consume a
   lot of memory for large discrete grids. By `Joel Nothman`_.
 
 - Fixed bug in :class:`linear_model.LogisticRegressionCV` where `penalty` was ignored
   in the final fit. By `Manoj Kumar`_.
 
-- Fixed bug in :class:`ensemble.forest.ForestClassifier` while computing
+- Fixed bug in `ensemble.forest.ForestClassifier` while computing
   oob_score and X is a sparse.csc_matrix. By :user:`Ankur Ankan <ankurankan>`.
 
 - All regressors now consistently handle and warn when given ``y`` that is of
@@ -313,17 +313,17 @@ Bug fixes
 - Fix in :class:`cluster.KMeans` cluster reassignment for sparse input by
   `Lars Buitinck`_.
 
-- Fixed a bug in :class:`lda.LDA` that could cause asymmetric covariance
+- Fixed a bug in :class:`decomposition.LDA` that could cause asymmetric covariance
   matrices when using shrinkage. By `Martin Billinger`_.
 
-- Fixed :func:`cross_validation.cross_val_predict` for estimators with
+- Fixed `cross_validation.cross_val_predict` for estimators with
   sparse predictions. By Buddha Prakash.
 
 - Fixed the ``predict_proba`` method of :class:`linear_model.LogisticRegression`
   to use soft-max instead of one-vs-rest normalization. By `Manoj Kumar`_.
   (:issue:`5182`)
 
-- Fixed the :func:`partial_fit` method of :class:`linear_model.SGDClassifier`
+- Fixed the `partial_fit` method of :class:`linear_model.SGDClassifier`
   when called with ``average=True``. By :user:`Andrew Lamb <andylamb>`.
   (:issue:`5282`)
 
@@ -339,17 +339,17 @@ Bug fixes
   automatically changes the solver to 'sag' in this case.
   :issue:`5360` by `Tom Dupre la Tour`_.
 
-- Fixed a performance bug in :class:`decomposition.RandomizedPCA` on data
+- Fixed a performance bug in `decomposition.RandomizedPCA` on data
   with a large number of features and fewer samples. (:issue:`4478`)
   By `Andreas Müller`_, `Loic Esteve`_ and :user:`Giorgio Patrini <giorgiop>`.
 
-- Fixed bug in :class:`cross_decomposition.PLS` that yielded unstable and
+- Fixed bug in `cross_decomposition.PLS` that yielded unstable and
   platform dependent output, and failed on `fit_transform`.
   By :user:`Arthur Mensch <arthurmensch>`.
 
 - Fixes to the ``Bunch`` class used to store datasets.
 
-- Fixed :func:`ensemble.plot_partial_dependence` ignoring the
+- Fixed `ensemble.plot_partial_dependence` ignoring the
   ``percentiles`` parameter.
 
 - Providing a ``set`` as vocabulary in ``CountVectorizer`` no longer
@@ -361,8 +361,8 @@ Bug fixes
   :class:`linear_model.Lasso` and :class:`linear_model.ElasticNet`.
 
 - Fixed inconsistent memory layout in the coordinate descent solver
-  that affected :class:`linear_model.DictionaryLearning` and
-  :class:`covariance.GraphLasso`. (:issue:`5337`)
+  that affected `linear_model.DictionaryLearning` and
+  `covariance.GraphLasso`. (:issue:`5337`)
   By `Olivier Grisel`_.
 
 - :class:`manifold.LocallyLinearEmbedding` no longer ignores the ``reg``
@@ -396,7 +396,7 @@ API changes summary
   in :class:`preprocessing.StandardScaler` is deprecated and superseded
   by `scale_`; it won't be available in 0.19. By :user:`Giorgio Patrini <giorgiop>`.
 
-- :class:`svm.SVC`` and :class:`svm.NuSVC` now have an ``decision_function_shape``
+- :class:`svm.SVC` and :class:`svm.NuSVC` now have an ``decision_function_shape``
   parameter to make their decision function of shape ``(n_samples, n_classes)``
   by setting ``decision_function_shape='ovr'``. This will be the default behavior
   starting in 0.19. By `Andreas Müller`_.
@@ -407,7 +407,7 @@ API changes summary
   to be explicitly shaped ``(n_samples, n_features)``.
   By :user:`Vighnesh Birodkar <vighneshbirodkar>`.
 
-- :class:`lda.LDA` and :class:`qda.QDA` have been moved to
+- `lda.LDA` and `qda.QDA` have been moved to
   :class:`discriminant_analysis.LinearDiscriminantAnalysis` and
   :class:`discriminant_analysis.QuadraticDiscriminantAnalysis`.
 
@@ -438,7 +438,7 @@ API changes summary
 - The ``decision_function`` on all regressors was deprecated and will be
   removed in 0.19.  Use ``predict`` instead.
 
-- :func:`datasets.load_lfw_pairs` is deprecated and will be removed in 0.19.
+- `datasets.load_lfw_pairs` is deprecated and will be removed in 0.19.
   Use :func:`datasets.fetch_lfw_pairs` instead.
 
 - The deprecated ``hmm`` module was removed.
@@ -446,9 +446,9 @@ API changes summary
 - The deprecated ``Bootstrap`` cross-validation iterator was removed.
 
 - The deprecated ``Ward`` and ``WardAgglomerative`` classes have been removed.
-  Use :class:`clustering.AgglomerativeClustering` instead.
+  Use :class:`cluster.AgglomerativeClustering` instead.
 
-- :func:`cross_validation.check_cv` is now a public function.
+- `cross_validation.check_cv` is now a public function.
 
 - The property ``residues_`` of :class:`linear_model.LinearRegression` is deprecated
   and will be removed in 0.19.
diff --git a/doc/whats_new/v0.18.rst b/doc/whats_new/v0.18.rst
index ea3548c0b9a0c..d4cd1025d69ef 100644
--- a/doc/whats_new/v0.18.rst
+++ b/doc/whats_new/v0.18.rst
@@ -189,8 +189,8 @@ Model Selection Enhancements and API Changes
 - **The model_selection module**
 
   The new module :mod:`sklearn.model_selection`, which groups together the
-  functionalities of formerly :mod:`sklearn.cross_validation`,
-  :mod:`sklearn.grid_search` and :mod:`sklearn.learning_curve`, introduces new
+  functionalities of formerly `sklearn.cross_validation`,
+  `sklearn.grid_search` and `sklearn.learning_curve`, introduces new
   possibilities such as nested cross-validation and better manipulation of
   parameter searches with Pandas.
 
@@ -202,7 +202,7 @@ Model Selection Enhancements and API Changes
   The new cross-validation splitters, defined in the
   :mod:`sklearn.model_selection`, are no longer initialized with any
   data-dependent parameters such as ``y``. Instead they expose a
-  :func:`split` method that takes in the data and yields a generator for the
+  `split` method that takes in the data and yields a generator for the
   different splits.
 
   This change makes it possible to use the cross-validation splitters to
@@ -258,7 +258,7 @@ Model Selection Enhancements and API Changes
 
 - **Fit parameter labels renamed to groups**
 
-  The ``labels`` parameter in the :func:`split` method of the newly renamed
+  The ``labels`` parameter in the `split` method of the newly renamed
   splitters :class:`model_selection.GroupKFold`,
   :class:`model_selection.LeaveOneGroupOut`,
   :class:`model_selection.LeavePGroupsOut`,
@@ -314,7 +314,7 @@ Other estimators
   for sounder results. :issue:`7295` by :user:`Wei Xue <xuewei4d>` and
   :user:`Thierry Guillemot <tguillemot>`.
 
-- Class :class:`decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
+- Class `decomposition.RandomizedPCA` is now factored into :class:`decomposition.PCA`
   and it is available calling with parameter ``svd_solver='randomized'``.
   The default number of ``n_iter`` for ``'randomized'`` has changed to 4. The old
   behavior of PCA is recovered by ``svd_solver='full'``. An additional solver
@@ -337,11 +337,11 @@ Other estimators
 
 Model selection and evaluation
 
-- Added :func:`metrics.cluster.fowlkes_mallows_score`, the Fowlkes Mallows
+- Added :func:`metrics.fowlkes_mallows_score`, the Fowlkes Mallows
   Index which measures the similarity of two clusterings of a set of points
   By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
 
-- Added :func:`metrics.calinski_harabaz_score`, which computes the Calinski
+- Added `metrics.calinski_harabaz_score`, which computes the Calinski
   and Harabaz score to evaluate the resulting clustering of a set of points.
   By :user:`Arnaud Fouchet <afouchet>` and :user:`Thierry Guillemot <tguillemot>`.
 
@@ -384,7 +384,7 @@ Trees and ensembles
   :issue:`6667` by :user:`Nelson Liu <nelson-liu>`.
 
 - The memory footprint is reduced (sometimes greatly) for
-  :class:`ensemble.bagging.BaseBagging` and classes that inherit from it,
+  `ensemble.bagging.BaseBagging` and classes that inherit from it,
   i.e, :class:`ensemble.BaggingClassifier`,
   :class:`ensemble.BaggingRegressor`, and :class:`ensemble.IsolationForest`,
   by dynamically generating attribute ``estimators_samples_`` only when it is
@@ -462,7 +462,7 @@ Model evaluation and meta-estimators
 
 - Added support for substituting or disabling :class:`pipeline.Pipeline`
   and :class:`pipeline.FeatureUnion` components using the ``set_params``
-  interface that powers :mod:`sklearn.grid_search`.
+  interface that powers `sklearn.grid_search`.
   See :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py`
   By `Joel Nothman`_ and :user:`Robert McGibbon <rmcgibbo>`.
 
@@ -489,7 +489,7 @@ Metrics
   :user:`Mads Jensen <indianajensen>` and :user:`Nelson Liu <nelson-liu>`.
 
 - Support sparse contingency matrices in cluster evaluation
-  (:mod:`metrics.cluster.supervised`) to scale to a large number of
+  (`metrics.cluster.supervised`) to scale to a large number of
   clusters.
   :issue:`7419` by :user:`Gregory Stupp <stuppie>` and `Joel Nothman`_.
 
@@ -512,22 +512,22 @@ Miscellaneous
   C/C++ files. By :user:`Arthur Mensch <arthurmensch>`.
 
 - Reduce the memory usage for 32-bit float input arrays of
-  :func:`utils.sparse_func.mean_variance_axis` and
-  :func:`utils.sparse_func.incr_mean_variance_axis` by supporting cython
+  `utils.sparse_func.mean_variance_axis` and
+  `utils.sparse_func.incr_mean_variance_axis` by supporting cython
   fused types. By :user:`YenChen Lin <yenchenlin>`.
 
-- The :func:`ignore_warnings` now accept a category argument to ignore only
+- The `ignore_warnings` now accept a category argument to ignore only
   the warnings of a specified type. By :user:`Thierry Guillemot <tguillemot>`.
 
 - Added parameter ``return_X_y`` and return type ``(data, target) : tuple`` option to
-  :func:`load_iris` dataset
+  :func:`datasets.load_iris` dataset
   :issue:`7049`,
-  :func:`load_breast_cancer` dataset
+  :func:`datasets.load_breast_cancer` dataset
   :issue:`7152`,
-  :func:`load_digits` dataset,
-  :func:`load_diabetes` dataset,
-  :func:`load_linnerud` dataset,
-  :func:`load_boston` dataset
+  :func:`datasets.load_digits` dataset,
+  :func:`datasets.load_diabetes` dataset,
+  :func:`datasets.load_linnerud` dataset,
+  `datasets.load_boston` dataset
   :issue:`7154` by
   :user:`Manvendra Singh<manu-chroma>`.
 
@@ -584,7 +584,7 @@ Linear, kernelized and related models
 
 Decomposition, manifold learning and clustering
 
-- :class:`decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
+- `decomposition.RandomizedPCA` default number of `iterated_power` is 4 instead of 3.
   :issue:`5141` by :user:`Giorgio Patrini <giorgiop>`.
 
 - :func:`utils.extmath.randomized_svd` performs 4 power iterations by default, instead or 0.
@@ -595,15 +595,15 @@ Decomposition, manifold learning and clustering
   :issue:`5299` by :user:`Giorgio Patrini<giorgiop>`.
 
 - Whiten/non-whiten inconsistency between components of :class:`decomposition.PCA`
-  and :class:`decomposition.RandomizedPCA` (now factored into PCA, see the
+  and `decomposition.RandomizedPCA` (now factored into PCA, see the
   New features) is fixed. `components_` are stored with no whitening.
   :issue:`5299` by :user:`Giorgio Patrini <giorgiop>`.
 
 - Fixed bug in :func:`manifold.spectral_embedding` where diagonal of unnormalized
   Laplacian matrix was incorrectly set to 1. :issue:`4995` by :user:`Peter Fischer <yanlend>`.
 
-- Fixed incorrect initialization of :func:`utils.arpack.eigsh` on all
-  occurrences. Affects :class:`cluster.bicluster.SpectralBiclustering`,
+- Fixed incorrect initialization of `utils.arpack.eigsh` on all
+  occurrences. Affects `cluster.bicluster.SpectralBiclustering`,
   :class:`decomposition.KernelPCA`, :class:`manifold.LocallyLinearEmbedding`,
   and :class:`manifold.SpectralEmbedding` (:issue:`5012`). By
   :user:`Peter Fischer <yanlend>`.
@@ -614,7 +614,7 @@ Decomposition, manifold learning and clustering
 
 Preprocessing and feature selection
 
-- :func:`preprocessing.data._transform_selected` now always passes a copy
+- `preprocessing.data._transform_selected` now always passes a copy
   of ``X`` to transform function when ``copy=True`` (:issue:`7194`). By `Caio
   Oliveira <https://github.com/caioaao>`_.
 
@@ -633,8 +633,8 @@ Model evaluation and meta-estimators
   return splits of size ``train_size`` and ``test_size`` in all cases
   (:issue:`6472`). By `Andreas Müller`_.
 
-- Cross-validation of :class:`OneVsOneClassifier` and
-  :class:`OneVsRestClassifier` now works with precomputed kernels.
+- Cross-validation of :class:`multiclass.OneVsOneClassifier` and
+  :class:`multiclass.OneVsRestClassifier` now works with precomputed kernels.
   :issue:`7350` by :user:`Russell Smith <rsmith54>`.
 
 - Fix incomplete ``predict_proba`` method delegation from
@@ -654,7 +654,7 @@ Metrics
 - Fix bug where expected and adjusted mutual information were incorrect if
   cluster contingency cells exceeded ``2**16``. By `Joel Nothman`_.
 
-- :func:`metrics.pairwise.pairwise_distances` now converts arrays to
+- :func:`metrics.pairwise_distances` now converts arrays to
   boolean arrays when required in ``scipy.spatial.distance``.
   :issue:`5460` by `Tom Dupre la Tour`_.
 
@@ -667,7 +667,7 @@ Metrics
 
 Miscellaneous
 
-- :func:`model_selection.tests._search._check_param_grid` now works correctly with all types
+- `model_selection.tests._search._check_param_grid` now works correctly with all types
   that extends/implements `Sequence` (except string), including range (Python 3.x) and xrange
   (Python 2.x). :issue:`7323` by Viacheslav Kovalevskyi.
 
@@ -698,7 +698,7 @@ Linear, kernelized and related models
 
 Decomposition, manifold learning and clustering
 
-- The old :class:`mixture.DPGMM` is deprecated in favor of the new
+- The old `mixture.DPGMM` is deprecated in favor of the new
   :class:`mixture.BayesianGaussianMixture` (with the parameter
   ``weight_concentration_prior_type='dirichlet_process'``).
   The new class solves the computational
@@ -706,7 +706,7 @@ Decomposition, manifold learning and clustering
   Dirichlet process prior faster than before.
   :issue:`7295` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
 
-- The old :class:`mixture.VBGMM` is deprecated in favor of the new
+- The old `mixture.VBGMM` is deprecated in favor of the new
   :class:`mixture.BayesianGaussianMixture` (with the parameter
   ``weight_concentration_prior_type='dirichlet_distribution'``).
   The new class solves the computational
@@ -714,15 +714,15 @@ Decomposition, manifold learning and clustering
   mixture faster than before.
   :issue:`6651` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
 
-- The old :class:`mixture.GMM` is deprecated in favor of the new
+- The old `mixture.GMM` is deprecated in favor of the new
   :class:`mixture.GaussianMixture`. The new class computes the Gaussian mixture
   faster than before and some of computational problems have been solved.
   :issue:`6666` by :user:`Wei Xue <xuewei4d>` and :user:`Thierry Guillemot <tguillemot>`.
 
 Model evaluation and meta-estimators
 
-- The :mod:`sklearn.cross_validation`, :mod:`sklearn.grid_search` and
-  :mod:`sklearn.learning_curve` have been deprecated and the classes and
+- The `sklearn.cross_validation`, `sklearn.grid_search` and
+  `sklearn.learning_curve` have been deprecated and the classes and
   functions have been reorganized into the :mod:`sklearn.model_selection`
   module. Ref :ref:`model_selection_changes` for more information.
   :issue:`4294` by `Raghav RV`_.
@@ -747,7 +747,7 @@ Model evaluation and meta-estimators
   :class:`model_selection.GroupShuffleSplit`,
   :class:`model_selection.LeaveOneGroupOut`
   and :class:`model_selection.LeavePGroupsOut` respectively.
-  Also the parameter ``labels`` in the :func:`split` method of the newly
+  Also the parameter ``labels`` in the `split` method of the newly
   renamed splitters :class:`model_selection.LeaveOneGroupOut` and
   :class:`model_selection.LeavePGroupsOut` is renamed to
   ``groups``. Additionally in :class:`model_selection.LeavePGroupsOut`,
@@ -813,4 +813,3 @@ Hauck, trevorstephens, Tue Vo, Varun, Varun Jewalikar, Viacheslav, Vighnesh
 Birodkar, Vikram, Villu Ruusmann, Vinayak Mehta, walter, waterponey, Wenhua
 Yang, Wenjian Huang, Will Welch, wyseguy7, xyguo, yanlend, Yaroslav Halchenko,
 yelite, Yen, YenChenLin, Yichuan Liu, Yoav Ram, Yoshiki, Zheng RuiFeng, zivori, Óscar Nájera
-
diff --git a/doc/whats_new/v0.19.rst b/doc/whats_new/v0.19.rst
index c1a91af9f1ed4..b06e0b36b96a0 100644
--- a/doc/whats_new/v0.19.rst
+++ b/doc/whats_new/v0.19.rst
@@ -94,9 +94,9 @@ Regressions in 0.19.0 fixed in 0.19.1:
   longer accepted ``X`` as a list. :issue:`9600` by :user:`Rasul Kerimov
   <CoderINusE>`.
 
-- Fixed handling of :func:`cross_val_predict` for binary classification with
-  ``method='decision_function'``. :issue:`9593` by :user:`Reiichiro Nakano
-  <reiinakano>` and core devs.
+- Fixed handling of :func:`model_selection.cross_val_predict` for binary
+  classification with ``method='decision_function'``. :issue:`9593` by
+  :user:`Reiichiro Nakano <reiinakano>` and core devs.
 
 - Fix regression in :class:`pipeline.Pipeline` where it no longer accepted
   ``steps`` as a tuple. :issue:`9604` by :user:`Joris Van den Bossche
@@ -119,7 +119,7 @@ Regressions in 0.19.0 fixed in 0.19.1:
 Enhancements
 ............
 
-- Our test suite and :func:`utils.estimator_checks.check_estimators` can now be
+- Our test suite and :func:`utils.estimator_checks.check_estimator` can now be
   run without Nose installed. :issue:`9697` by :user:`Joan Massich <massich>`.
 
 - To improve usability of version 0.19's :class:`pipeline.Pipeline`
@@ -362,11 +362,11 @@ Linear, kernelized and related models
 
 Other predictors
 
-- Custom metrics for the :mod:`neighbors` binary trees now have
+- Custom metrics for the :mod:`sklearn.neighbors` binary trees now have
   fewer constraints: they must take two 1d-arrays and return a float.
   :issue:`6288` by `Jake Vanderplas`_.
 
-- ``algorithm='auto`` in :mod:`neighbors` estimators now chooses the most
+- ``algorithm='auto`` in :mod:`sklearn.neighbors` estimators now chooses the most
   appropriate algorithm for all input types and metrics. :issue:`9145` by
   :user:`Herilalaina Rakotoarison <herilalaina>` and :user:`Reddy Chinthala
   <preddy5>`.
@@ -396,7 +396,7 @@ Decomposition, manifold learning and clustering
 
 - Memory usage enhancements: Prevent cast from float32 to float64 in
   :class:`decomposition.PCA` and
-  :func:`decomposition.randomized_svd_low_rank`.
+  `decomposition.randomized_svd_low_rank`.
   :issue:`9067` by `Raghav RV`_.
 
 Preprocessing and feature selection
@@ -409,7 +409,7 @@ Preprocessing and feature selection
   with ``center=True``. :issue:`8065` by :user:`Daniel LeJeune <acadiansith>`.
 
 - Small performance improvement to n-gram creation in
-  :mod:`feature_extraction.text` by binding methods for loops and
+  :mod:`sklearn.feature_extraction.text` by binding methods for loops and
   special-casing unigrams. :issue:`7567` by :user:`Jaye Doepke <jtdoepke>`
 
 - Relax assumption on the data for the
@@ -486,12 +486,12 @@ Metrics
 
 Miscellaneous
 
-- :func:`utils.check_estimator` now attempts to ensure that methods
+- :func:`utils.estimator_checks.check_estimator` now attempts to ensure that methods
   transform, predict, etc.  do not set attributes on the estimator.
   :issue:`7533` by :user:`Ekaterina Krivich <kiote>`.
 
 - Added type checking to the ``accept_sparse`` parameter in
-  :mod:`utils.validation` methods. This parameter now accepts only boolean,
+  :mod:`sklearn.utils.validation` methods. This parameter now accepts only boolean,
   string, or list/tuple of strings. ``accept_sparse=None`` is deprecated and
   should be replaced by ``accept_sparse=False``.
   :issue:`7880` by :user:`Josh Karnofsky <jkarno>`.
@@ -570,7 +570,7 @@ Linear, kernelized and related models
   the same result as the LassoLars implementation available
   in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
 
-- Fixed a bug in :class:`linear_model.RandomizedLasso`,
+- Fixed a bug in `linear_model.RandomizedLasso`,
   :class:`linear_model.Lars`, :class:`linear_model.LassoLars`,
   :class:`linear_model.LarsCV` and :class:`linear_model.LassoLarsCV`,
   where the parameter ``precompute`` was not used consistently across
@@ -611,7 +611,7 @@ Linear, kernelized and related models
 
 Other predictors
 
-- Fix :class:`semi_supervised.BaseLabelPropagation` to correctly implement
+- Fix `semi_supervised.BaseLabelPropagation` to correctly implement
   ``LabelPropagation`` and ``LabelSpreading`` as done in the referenced
   papers. :issue:`9239`
   by :user:`Andre Ambrosio Boechat <boechat107>`, :user:`Utkarsh Upadhyay
@@ -642,7 +642,7 @@ Decomposition, manifold learning and clustering
 
 - Fixed the implementation of ``explained_variance_``
   in :class:`decomposition.PCA`,
-  :class:`decomposition.RandomizedPCA` and
+  `decomposition.RandomizedPCA` and
   :class:`decomposition.IncrementalPCA`.
   :issue:`9105` by `Hanmin Qin <https://github.com/qinhanmin2014>`_.
 
@@ -674,13 +674,13 @@ Decomposition, manifold learning and clustering
 - Fixed improper scaling in :class:`cross_decomposition.PLSRegression`
   with ``scale=True``. :issue:`7819` by :user:`jayzed82 <jayzed82>`.
 
-- :class:`cluster.bicluster.SpectralCoclustering` and
-  :class:`cluster.bicluster.SpectralBiclustering` ``fit`` method conforms
+- :class:`cluster.SpectralCoclustering` and
+  :class:`cluster.SpectralBiclustering` ``fit`` method conforms
   with API by accepting ``y`` and returning the object.  :issue:`6126`,
   :issue:`7814` by :user:`Laurent Direr <ldirer>` and :user:`Maniteja
   Nandana <maniteja123>`.
 
-- Fix bug where :mod:`mixture` ``sample`` methods did not return as many
+- Fix bug where :mod:`sklearn.mixture` ``sample`` methods did not return as many
   samples as requested. :issue:`7702` by :user:`Levi John Wolf <ljwolf>`.
 
 - Fixed the shrinkage implementation in :class:`neighbors.NearestCentroid`.
@@ -698,8 +698,8 @@ Preprocessing and feature selection
   selected fewer features than it should.
   :issue:`7490` by :user:`Peng Meng <mpjlu>`.
 
-- Fixed a bug where :class:`linear_model.RandomizedLasso` and
-  :class:`linear_model.RandomizedLogisticRegression` breaks for
+- Fixed a bug where `linear_model.RandomizedLasso` and
+  `linear_model.RandomizedLogisticRegression` breaks for
   sparse input. :issue:`8259` by :user:`Aman Dalmia <dalmia>`.
 
 - Fix a bug where :class:`feature_extraction.FeatureHasher`
@@ -715,14 +715,14 @@ Preprocessing and feature selection
 
 Model evaluation and meta-estimators
 
-- Fixed a bug where :func:`model_selection.BaseSearchCV.inverse_transform`
+- Fixed a bug where `model_selection.BaseSearchCV.inverse_transform`
   returns ``self.best_estimator_.transform()`` instead of
   ``self.best_estimator_.inverse_transform()``.
   :issue:`8344` by :user:`Akshay Gupta <Akshay0724>` and :user:`Rasmus Eriksson <MrMjauh>`.
 
 - Added ``classes_`` attribute to :class:`model_selection.GridSearchCV`,
-  :class:`model_selection.RandomizedSearchCV`,  :class:`grid_search.GridSearchCV`,
-  and  :class:`grid_search.RandomizedSearchCV` that matches the ``classes_``
+  :class:`model_selection.RandomizedSearchCV`,  `grid_search.GridSearchCV`,
+  and  `grid_search.RandomizedSearchCV` that matches the ``classes_``
   attribute of ``best_estimator_``. :issue:`7661` and :issue:`8295`
   by :user:`Alyssa Batula <abatula>`, :user:`Dylan Werner-Meier <unautre>`,
   and :user:`Stephen Hoover <stephen-hoover>`.
@@ -760,7 +760,7 @@ Metrics
   (`#7356 <https://github.com/scikit-learn/scikit-learn/pull/7356>`_). By
   :user:`Nick Dingwall <ndingwall>` and `Gael Varoquaux`_.
 
-- Fix a bug in :func:`metrics.classification._check_targets`
+- Fix a bug in `metrics.classification._check_targets`
   which would return ``'binary'`` if ``y_true`` and ``y_pred`` were
   both ``'binary'`` but the union of ``y_true`` and ``y_pred`` was
   ``'multiclass'``. :issue:`8377` by `Loic Esteve`_.
@@ -784,7 +784,7 @@ Miscellaneous
   incorrect result when ``n_samples`` is odd.
   :issue:`8198` by :user:`Josh Levy <levy5674>`.
 
-- Some ``fetch_`` functions in :mod:`datasets` were ignoring the
+- Some ``fetch_`` functions in :mod:`sklearn.datasets` were ignoring the
   ``download_if_missing`` keyword. :issue:`7944` by :user:`Ralf Gommers <rgommers>`.
 
 - Fix estimators to accept a ``sample_weight`` parameter of type
@@ -795,7 +795,7 @@ Miscellaneous
   raising an exception if instability is identified. :issue:`7376` and
   :issue:`7331` by `Joel Nothman`_ and :user:`yangarbiter`.
 
-- Fix a bug where :meth:`base.BaseEstimator.__getstate__`
+- Fix a bug where `base.BaseEstimator.__getstate__`
   obstructed pickling customizations of child-classes, when used in a
   multiple inheritance context.
   :issue:`8316` by :user:`Holger Peters <HolgerPeters>`.
@@ -837,7 +837,7 @@ Linear, kernelized and related models
 
 Other predictors
 
-- :class:`neighbors.LSHForest` has been deprecated and will be
+- `neighbors.LSHForest` has been deprecated and will be
   removed in 0.21 due to poor performance.
   :issue:`9078` by :user:`Laurent Direr <ldirer>`.
 
@@ -884,8 +884,8 @@ Preprocessing and feature selection
   ``alternate_sign``.
   :issue:`7565` by :user:`Roman Yurchak <rth>`.
 
-- :class:`linear_model.RandomizedLogisticRegression`,
-  and :class:`linear_model.RandomizedLasso` have been deprecated and will
+- `linear_model.RandomizedLogisticRegression`,
+  and `linear_model.RandomizedLasso` have been deprecated and will
   be removed in version 0.21.
   :issue:`8995` by :user:`Ramana.S <sentient07>`.
 
@@ -944,7 +944,7 @@ Miscellaneous
 
 - SciPy >= 0.13.3 and NumPy >= 1.8.2 are now the minimum supported versions
   for scikit-learn. The following backported functions in
-  :mod:`utils` have been removed or deprecated accordingly.
+  :mod:`sklearn.utils` have been removed or deprecated accordingly.
   :issue:`8854` and :issue:`8874` by :user:`Naoya Kanai <naoyak>`
 
 - The ``store_covariances`` and ``covariances_`` parameters of
@@ -994,7 +994,7 @@ Miscellaneous
 
 - Ensure that estimators' attributes ending with ``_`` are not set
   in the constructor but only in the ``fit`` method. Most notably,
-  ensemble estimators (deriving from :class:`ensemble.BaseEnsemble`)
+  ensemble estimators (deriving from `ensemble.BaseEnsemble`)
   now only have ``self.estimators_`` available after ``fit``.
   :issue:`7464` by `Lars Buitinck`_ and `Loic Esteve`_.
 
diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 1f899bfccc838..1c62006907231 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -34,7 +34,7 @@ The bundled version of joblib was upgraded from 0.13.0 to 0.13.2.
 :mod:`sklearn.decomposition`
 ............................
 
-- |Fix| Fixed a bug in :class:`cross_decomposition.CCA` improving numerical 
+- |Fix| Fixed a bug in :class:`cross_decomposition.CCA` improving numerical
   stability when `Y` is close to zero. :pr:`13903` by `Thomas Fan`_.
 
 
@@ -104,7 +104,7 @@ Changelog
 :mod:`sklearn.feature_extraction`
 .................................
 
-- |Fix| Fixed a bug in :class:`feature_extraction.text.CountVectorizer` which 
+- |Fix| Fixed a bug in :class:`feature_extraction.text.CountVectorizer` which
   would result in the sparse feature matrix having conflicting `indptr` and
   `indices` precisions under very large vocabularies. :issue:`11295` by
   :user:`Gabriel Vacaliuc <gvacaliuc>`.
@@ -209,7 +209,7 @@ Changelog
 :mod:`sklearn.neighbors`
 ........................
 
-- |Fix| Fixed :class:`sklearn.neighbors.DistanceMetric` jaccard distance
+- |Fix| Fixed `sklearn.neighbors.DistanceMetric` jaccard distance
   function to return 0 when two all-zero vectors are compared.
   :issue:`12685` by :user:`Thomas Fan <thomasjpfan>`.
 
@@ -342,7 +342,7 @@ Changelog
   those estimators as part of parallel parameter search or cross-validation.
   :issue:`12122` by :user:`Olivier Grisel <ogrisel>`.
 
-- |Fix| Fixed a bug affecting :class:`SGDClassifier` in the multiclass
+- |Fix| Fixed a bug affecting :class:`linear_model.SGDClassifier` in the multiclass
   case. Each one-versus-all step is run in a :class:`joblib.Parallel` call and
   mutating a common parameter, causing a segmentation fault if called within a
   backend using processes and not threads. We now use ``require=sharedmem``
@@ -352,16 +352,16 @@ Changelog
 :mod:`sklearn.metrics`
 ......................
 
-- |Fix| Fixed a bug in :func:`metrics.pairwise.pairwise_distances_argmin_min`
+- |Fix| Fixed a bug in `metrics.pairwise.pairwise_distances_argmin_min`
   which returned the square root of the distance when the metric parameter was
   set to "euclidean". :issue:`12481` by
   :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| Fixed a bug in :func:`metrics.pairwise.pairwise_distances_chunked`
+- |Fix| Fixed a bug in `metrics.pairwise.pairwise_distances_chunked`
   which didn't ensure the diagonal is zero for euclidean distances.
   :issue:`12612` by :user:`Andreas Müller <amueller>`.
 
-- |API| The :func:`metrics.calinski_harabaz_score` has been renamed to
+- |API| The `metrics.calinski_harabaz_score` has been renamed to
   :func:`metrics.calinski_harabasz_score` and will be removed in version 0.23.
   :issue:`12211` by :user:`Lisa Thomas <LisaThomas9>`,
   :user:`Mark Hannel <markhannel>` and :user:`Melissa Ferrari <mferrari3>`.
@@ -399,7 +399,7 @@ Changelog
   :issue:`12522` by :user:`Nicolas Hug<NicolasHug>`.
 
 - |Fix| Fixed a bug in :class:`preprocessing.OneHotEncoder` where transform
-  failed when set to ignore unknown numpy strings of different lengths 
+  failed when set to ignore unknown numpy strings of different lengths
   :issue:`12471` by :user:`Gabriel Marzinotto<GMarzinotto>`.
 
 - |API| The default value of the :code:`method` argument in
@@ -419,7 +419,7 @@ Changelog
 - |Fix| Calling :func:`utils.check_array` on `pandas.Series`, which
   raised an error in 0.20.0, now returns the expected output again.
   :issue:`12625` by `Andreas Müller`_
-  
+
 Miscellaneous
 .............
 
@@ -493,7 +493,7 @@ including missing values, categorical variables, heterogeneous data, and
 features/targets with unusual distributions.
 Missing values in features, represented by NaNs, are now accepted in
 column-wise preprocessing such as scalers. Each feature is fitted disregarding
-NaNs, and data containing NaNs can be transformed. The new :mod:`impute`
+NaNs, and data containing NaNs can be transformed. The new :mod:`sklearn.impute`
 module provides estimators for learning despite missing data.
 
 :class:`~compose.ColumnTransformer` handles the case where different features
@@ -545,7 +545,7 @@ random sampling procedures.
 - :class:`linear_model.SGDRegressor` (bug fix)
 - :class:`metrics.roc_auc_score` (bug fix)
 - :class:`metrics.roc_curve` (bug fix)
-- :class:`neural_network.BaseMultilayerPerceptron` (bug fix)
+- `neural_network.BaseMultilayerPerceptron` (bug fix)
 - :class:`neural_network.MLPClassifier` (bug fix)
 - :class:`neural_network.MLPRegressor` (bug fix)
 - The v0.19.0 release notes failed to mention a backwards incompatibility with
@@ -616,7 +616,7 @@ Support for Python 3.3 has been officially dropped.
   by :user:`Jan Margeta <jmargeta>`, :user:`Guillaume Lemaitre <glemaitre>`,
   and :user:`Devansh D. <devanshdalal>`.
 
-- |Fix| Fixed a bug in :func:`cluster.k_means_elkan` where the returned
+- |Fix| Fixed a bug in `cluster.k_means_elkan` where the returned
   ``iteration`` was 1 less than the correct value. Also added the missing
   ``n_iter_`` attribute in the docstring of :class:`cluster.KMeans`.
   :issue:`11353` by :user:`Jeremie du Boisberranger <jeremiedbb>`.
@@ -654,8 +654,8 @@ Support for Python 3.3 has been officially dropped.
 - |Efficiency| Runtime improvements to :class:`covariance.GraphicalLasso`.
   :issue:`9858` by :user:`Steven Brown <stevendbrown>`.
 
-- |API| The :func:`covariance.graph_lasso`,
-  :class:`covariance.GraphLasso` and :class:`covariance.GraphLassoCV` have been
+- |API| The `covariance.graph_lasso`,
+  `covariance.GraphLasso` and `covariance.GraphLassoCV` have been
   renamed to :func:`covariance.graphical_lasso`,
   :class:`covariance.GraphicalLasso` and :class:`covariance.GraphicalLassoCV`
   respectively and will be removed in version 0.22.
@@ -675,14 +675,14 @@ Support for Python 3.3 has been officially dropped.
   cluster. :issue:`8617` by :user:`Maskani Filali Mohamed <maskani-moh>` and
   :user:`Konstantinos Katrioplas <kkatrio>`.
 
-- |Feature| Add ``filename`` attribute to :mod:`datasets` that have a CSV file.
+- |Feature| Add ``filename`` attribute to :mod:`sklearn.datasets` that have a CSV file.
   :issue:`9101` by :user:`alex-33 <alex-33>`
   and :user:`Maskani Filali Mohamed <maskani-moh>`.
 
 - |Feature| ``return_X_y`` parameter has been added to several dataset loaders.
   :issue:`10774` by :user:`Chris Catalfo <ccatalfo>`.
 
-- |Fix| Fixed a bug in :func:`datasets.load_boston` which had a wrong data
+- |Fix| Fixed a bug in `datasets.load_boston` which had a wrong data
   point. :issue:`10795` by :user:`Takeshi Yoshizawa <tarcusx>`.
 
 - |Fix| Fixed a bug in :func:`datasets.load_iris` which had two wrong data points.
@@ -696,7 +696,7 @@ Support for Python 3.3 has been officially dropped.
   data points could be generated. :issue:`10045` by :user:`Christian Braune
   <christianbraune79>`.
 
-- |API| Deprecated :func:`sklearn.datasets.fetch_mldata` to be removed in
+- |API| Deprecated `sklearn.datasets.fetch_mldata` to be removed in
   version 0.22. mldata.org is no longer operational. Until removal it will
   remain possible to load cached datasets. :issue:`11466` by `Joel Nothman`_.
 
@@ -751,8 +751,8 @@ Support for Python 3.3 has been officially dropped.
 :mod:`sklearn.discriminant_analysis`
 ....................................
 
-- |Efficiency| Memory usage improvement for :func:`_class_means` and
-  :func:`_class_cov` in :mod:`discriminant_analysis`. :issue:`10898` by
+- |Efficiency| Memory usage improvement for `_class_means` and
+  `_class_cov` in :mod:`sklearn.discriminant_analysis`. :issue:`10898` by
   :user:`Nanxin Chen <bobchennan>`.
 
 
@@ -809,14 +809,14 @@ Support for Python 3.3 has been officially dropped.
   to 100 in 0.22. A FutureWarning is raised when the default value is used.
   :issue:`11542` by :user:`Anna Ayzenshtat <annaayzenshtat>`.
 
-- |API| Classes derived from :class:`ensemble.BaseBagging`. The attribute
+- |API| Classes derived from `ensemble.BaseBagging`. The attribute
   ``estimators_samples_`` will return a list of arrays containing the indices
   selected for each bootstrap instead of a list of arrays containing the mask
   of the samples selected for each bootstrap. Indices allows to repeat samples
   while mask does not allow this functionality.
   :issue:`9524` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Fix| :class:`ensemble.BaseBagging` where one could not deterministically
+- |Fix| `ensemble.BaseBagging` where one could not deterministically
   reproduce ``fit`` result using the object attributes when ``random_state``
   is set. :issue:`9723` by :user:`Guillaume Lemaitre <glemaitre>`.
 
@@ -925,7 +925,7 @@ Support for Python 3.3 has been officially dropped.
   :class:`linear_model.BayesianRidge` for weighted linear regression.
   :issue:`10112` by :user:`Peter St. John <pstjohn>`.
 
-- |Fix| Fixed a bug in :func:`logistic.logistic_regression_path` to ensure
+- |Fix| Fixed a bug in `logistic.logistic_regression_path` to ensure
   that the returned coefficients are correct when ``multiclass='multinomial'``.
   Previously, some of the coefficients would override each other, leading to
   incorrect results in :class:`linear_model.LogisticRegressionCV`.
@@ -1027,7 +1027,7 @@ Support for Python 3.3 has been officially dropped.
 - |Feature| Support sparse input in :meth:`manifold.Isomap.fit`.
   :issue:`8554` by :user:`Leland McInnes <lmcinnes>`.
 
-- |Feature| :func:`manifold.t_sne.trustworthiness` accepts metrics other than
+- |Feature| `manifold.t_sne.trustworthiness` accepts metrics other than
   Euclidean. :issue:`9775` by :user:`William de Vazelhes <wdevazelhes>`.
 
 - |Fix| Fixed a bug in :func:`manifold.spectral_embedding` where the
@@ -1037,14 +1037,14 @@ Support for Python 3.3 has been officially dropped.
   <devanshdalal>`.
 
 - |API| |Feature| Deprecate ``precomputed`` parameter in function
-  :func:`manifold.t_sne.trustworthiness`. Instead, the new parameter ``metric``
+  `manifold.t_sne.trustworthiness`. Instead, the new parameter ``metric``
   should be used with any compatible metric including 'precomputed', in which
   case the input matrix ``X`` should be a matrix of pairwise distances or
   squared distances. :issue:`9775` by :user:`William de Vazelhes
   <wdevazelhes>`.
 
 - |API| Deprecate ``precomputed`` parameter in function
-  :func:`manifold.t_sne.trustworthiness`. Instead, the new parameter
+  `manifold.t_sne.trustworthiness`. Instead, the new parameter
   ``metric`` should be used with any compatible metric including
   'precomputed', in which case the input matrix ``X`` should be a matrix of
   pairwise distances or squared distances. :issue:`9775` by
@@ -1161,12 +1161,12 @@ Support for Python 3.3 has been officially dropped.
   calling :term:`fit` and :term:`predict`. :issue:`10336` by :user:`Shu Haoran
   <haoranShu>` and :user:`Andrew Peng <Andrew-peng>`.
 
-- |Fix| Fixed a bug in :class:`mixture.BaseMixture` where the reported `n_iter_` was
+- |Fix| Fixed a bug in `mixture.BaseMixture` where the reported `n_iter_` was
   missing an iteration. It affected :class:`mixture.GaussianMixture` and
   :class:`mixture.BayesianGaussianMixture`. :issue:`10740` by :user:`Erich
   Schubert <kno10>` and :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Fix| Fixed a bug in :class:`mixture.BaseMixture` and its subclasses
+- |Fix| Fixed a bug in `mixture.BaseMixture` and its subclasses
   :class:`mixture.GaussianMixture` and :class:`mixture.BayesianGaussianMixture`
   where the ``lower_bound_`` was not the max lower bound across all
   initializations (when ``n_init > 1``), but just the lower bound of the last
@@ -1192,7 +1192,7 @@ Support for Python 3.3 has been officially dropped.
   :func:`model_selection.cross_val_score`,
   :func:`model_selection.learning_curve` and
   :func:`model_selection.validation_curve` to control the behavior triggered
-  when an error occurs in :func:`model_selection._fit_and_score`.
+  when an error occurs in `model_selection._fit_and_score`.
   :issue:`11576` by :user:`Samuel O. Ronsin <samronsin>`.
 
 - |Feature| `BaseSearchCV` now has an experimental, private interface to
@@ -1271,7 +1271,7 @@ Support for Python 3.3 has been officially dropped.
   parallelized according to ``n_jobs`` regardless of ``algorithm``.
   :issue:`10887` by :user:`Joël Billaud <recamshak>`.
 
-- |Efficiency| :mod:`Nearest neighbors <neighbors>` query methods are now more
+- |Efficiency| :mod:`sklearn.neighbors` query methods are now more
   memory efficient when ``algorithm='brute'``.
   :issue:`11136` by `Joel Nothman`_ and :user:`Aman Dalmia <dalmia>`.
 
@@ -1305,7 +1305,7 @@ Support for Python 3.3 has been officially dropped.
   :issue:`11556` by :user:`Jake VanderPlas <jakevdp>`
 
 - |Fix| Fixed a bug in :class:`neighbors.KDTree` and :class:`neighbors.BallTree` where
-  pickled tree objects would change their type to the super class :class:`BinaryTree`.
+  pickled tree objects would change their type to the super class `BinaryTree`.
   :issue:`11774` by :user:`Nicolas Hug <NicolasHug>`.
 
 
@@ -1313,13 +1313,13 @@ Support for Python 3.3 has been officially dropped.
 .............................
 
 - |Feature| Add `n_iter_no_change` parameter in
-  :class:`neural_network.BaseMultilayerPerceptron`,
+  `neural_network.BaseMultilayerPerceptron`,
   :class:`neural_network.MLPRegressor`, and
   :class:`neural_network.MLPClassifier` to give control over
   maximum number of epochs to not meet ``tol`` improvement.
   :issue:`9456` by :user:`Nicholas Nadeau <nnadeau>`.
 
-- |Fix| Fixed a bug in :class:`neural_network.BaseMultilayerPerceptron`,
+- |Fix| Fixed a bug in `neural_network.BaseMultilayerPerceptron`,
   :class:`neural_network.MLPRegressor`, and
   :class:`neural_network.MLPClassifier` with new ``n_iter_no_change``
   parameter now at 10 from previously hardcoded 2.
@@ -1441,13 +1441,13 @@ Support for Python 3.3 has been officially dropped.
   :class:`compose.ColumnTransformer`.
   :issue:`10521` by `Joris Van den Bossche`_.
 
-- |API| Deprecate :class:`preprocessing.Imputer` and move
+- |API| Deprecate `preprocessing.Imputer` and move
   the corresponding module to :class:`impute.SimpleImputer`.
   :issue:`9726` by :user:`Kumar Ashutosh
   <thechargedneutron>`.
 
 - |API| The ``axis`` parameter that was in
-  :class:`preprocessing.Imputer` is no longer present in
+  `preprocessing.Imputer` is no longer present in
   :class:`impute.SimpleImputer`. The behavior is equivalent
   to ``axis=0`` (impute along columns). Row-wise
   imputation can be performed with FunctionTransformer
@@ -1458,7 +1458,7 @@ Support for Python 3.3 has been officially dropped.
 
 - |API| The NaN marker for the missing values has been changed
   between the :class:`preprocessing.Imputer` and the
-  :class:`impute.SimpleImputer`.
+  `impute.SimpleImputer`.
   ``missing_values='NaN'`` should now be
   ``missing_values=np.nan``. :issue:`11211` by
   :user:`Jeremie du Boisberranger <jeremiedbb>`.
@@ -1491,15 +1491,15 @@ Support for Python 3.3 has been officially dropped.
 ...................
 
 - |Enhancement| Although private (and hence not assured API stability),
-  :class:`tree._criterion.ClassificationCriterion` and
-  :class:`tree._criterion.RegressionCriterion` may now be cimported and
+  `tree._criterion.ClassificationCriterion` and
+  `tree._criterion.RegressionCriterion` may now be cimported and
   extended. :issue:`10325` by :user:`Camil Staps <camilstaps>`.
 
-- |Fix| Fixed a bug in :class:`tree.BaseDecisionTree` with `splitter="best"`
+- |Fix| Fixed a bug in `tree.BaseDecisionTree` with `splitter="best"`
   where split threshold could become infinite when values in X were
   near infinite. :issue:`10536` by :user:`Jonathan Ohayon <Johayon>`.
 
-- |Fix| Fixed a bug in :class:`tree.MAE` to ensure sample weights are being
+- |Fix| Fixed a bug in `tree.MAE` to ensure sample weights are being
   used during the calculation of tree MAE impurity. Previous behaviour could
   cause suboptimal splits to be chosen since the impurity calculation
   considered all samples to be of equal weight importance.
@@ -1559,7 +1559,7 @@ Multiple modules
 
 - |API| Changed warning type from :class:`UserWarning` to
   :class:`exceptions.ConvergenceWarning` for failing convergence in
-  :func:`linear_model.logistic_regression_path`,
+  `linear_model.logistic_regression_path`,
   :class:`linear_model.RANSACRegressor`, :func:`linear_model.ridge_regression`,
   :class:`gaussian_process.GaussianProcessRegressor`,
   :class:`gaussian_process.GaussianProcessClassifier`,
diff --git a/doc/whats_new/v0.21.rst b/doc/whats_new/v0.21.rst
index 6f6e7eed19bc2..40bdf7540bfc4 100644
--- a/doc/whats_new/v0.21.rst
+++ b/doc/whats_new/v0.21.rst
@@ -67,8 +67,8 @@ Changelog
 :mod:`sklearn.ensemble`
 .......................
 
-- |Fix| Fix zero division error in :func:`HistGradientBoostingClassifier` and
-  :func:`HistGradientBoostingRegressor`.
+- |Fix| Fix zero division error in :class:`ensemble.HistGradientBoostingClassifier` and
+  :class:`ensemble.HistGradientBoostingRegressor`.
   :pr:`14024` by `Nicolas Hug <NicolasHug>`.
 
 :mod:`sklearn.impute`
@@ -81,7 +81,7 @@ Changelog
 :mod:`sklearn.inspection`
 .........................
 
-- |Fix| Fixed a bug in :func:`inspection.plot_partial_dependence` where 
+- |Fix| Fixed a bug in :func:`inspection.plot_partial_dependence` where
   ``target`` parameter was not being taken into account for multiclass problems.
   :pr:`14393` by :user:`Guillem G. Subies <guillemgsubies>`.
 
@@ -109,7 +109,7 @@ Changelog
 :mod:`sklearn.tree`
 ...................
 
-- |Fix| Fixed bug in :func:`tree.export_text` when the tree has one feature and 
+- |Fix| Fixed bug in :func:`tree.export_text` when the tree has one feature and
   a single feature name is passed in. :pr:`14053` by `Thomas Fan`.
 
 - |Fix| Fixed an issue with :func:`plot_tree` where it displayed
@@ -129,7 +129,7 @@ Changelog
 :mod:`sklearn.decomposition`
 ............................
 
-- |Fix| Fixed a bug in :class:`cross_decomposition.CCA` improving numerical 
+- |Fix| Fixed a bug in :class:`cross_decomposition.CCA` improving numerical
   stability when `Y` is close to zero. :pr:`13903` by `Thomas Fan`_.
 
 :mod:`sklearn.metrics`
@@ -148,11 +148,11 @@ Changelog
   by :user:`James Myatt <jamesmyatt>`.
 
 
-:mod:`sklearn.utils.sparsefuncs`
-................................
+`sklearn.utils.sparsefuncs`
+...........................
 
-- |Fix| Fixed a bug where :func:`min_max_axis` would fail on 32-bit systems
-  for certain large inputs. This affects :class:`preprocessing.MaxAbsScaler`, 
+- |Fix| Fixed a bug where `min_max_axis` would fail on 32-bit systems
+  for certain large inputs. This affects :class:`preprocessing.MaxAbsScaler`,
   :func:`preprocessing.normalize` and :class:`preprocessing.LabelBinarizer`.
   :pr:`13741` by :user:`Roddy MacSween <rlms>`.
 
@@ -230,7 +230,7 @@ random sampling procedures.
 - :func:`svm.SVC.decision_function` and
   :func:`multiclass.OneVsOneClassifier.decision_function`. |Fix|
 - :class:`linear_model.SGDClassifier` and any derived classifiers. |Fix|
-- Any model using the :func:`linear_model._sag.sag_solver` function with a `0`
+- Any model using the `linear_model._sag.sag_solver` function with a `0`
   seed, including :class:`linear_model.LogisticRegression`,
   :class:`linear_model.LogisticRegressionCV`, :class:`linear_model.Ridge`,
   and :class:`linear_model.RidgeCV` with 'sag' solver. |Fix|
@@ -420,7 +420,7 @@ Support for Python 3.4 and below has been officially dropped.
     >>> from sklearn.experimental import enable_hist_gradient_boosting  # noqa
     >>> # now you can import normally from sklearn.ensemble
     >>> from sklearn.ensemble import HistGradientBoostingClassifier
-  
+
   .. note::
       Update: since version 1.0, these estimators are not experimental
       anymore and you don't need to use `from sklearn.experimental import
@@ -508,24 +508,24 @@ Support for Python 3.4 and below has been officially dropped.
   if any targets were strings. :pr:`12834` by :user:`Elizabeth Sander
   <elsander>`.
 
-- |Fix| Fixed a bug in :class:`ensemble.gradient_boosting.LossFunction` and
-  :class:`ensemble.gradient_boosting.LeastSquaresError` where the default
+- |Fix| Fixed a bug in `ensemble.gradient_boosting.LossFunction` and
+  `ensemble.gradient_boosting.LeastSquaresError` where the default
   value of ``learning_rate`` in ``update_terminal_regions`` is not consistent
   with the document and the caller functions. Note however that directly using
   these loss functions is deprecated.
   :pr:`6463` by :user:`movelikeriver <movelikeriver>`.
 
-- |Fix| :func:`ensemble.partial_dependence` (and consequently the new
+- |Fix| `ensemble.partial_dependence` (and consequently the new
   version :func:`sklearn.inspection.partial_dependence`) now takes sample
   weights into account for the partial dependence computation when the
   gradient boosting model has been trained with sample weights.
   :pr:`13193` by :user:`Samuel O. Ronsin <samronsin>`.
 
-- |API| :func:`ensemble.partial_dependence` and
-  :func:`ensemble.plot_partial_dependence` are now deprecated in favor of
+- |API| `ensemble.partial_dependence` and
+  `ensemble.plot_partial_dependence` are now deprecated in favor of
   :func:`inspection.partial_dependence<sklearn.inspection.partial_dependence>`
   and
-  :func:`inspection.plot_partial_dependence<sklearn.inspection.plot_partial_dependence>`.
+  `inspection.plot_partial_dependence<sklearn.inspection.plot_partial_dependence>`.
   :pr:`12599` by :user:`Trevor Stephens<trevorstephens>` and
   :user:`Nicolas Hug<NicolasHug>`.
 
@@ -540,10 +540,10 @@ Support for Python 3.4 and below has been officially dropped.
   :class:`pipeline.FeatureUnion` and :class:`compose.ColumnTransformer`).
   :pr:`13780` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-:mod:`sklearn.externals`
-........................
+`sklearn.externals`
+...................
 
-- |API| Deprecated :mod:`externals.six` since we have dropped support for
+- |API| Deprecated `externals.six` since we have dropped support for
   Python 2.7. :pr:`12916` by :user:`Hanmin Qin <qinhanmin2014>`.
 
 :mod:`sklearn.feature_extraction`
@@ -599,7 +599,7 @@ Support for Python 3.4 and below has been officially dropped.
 (new subpackage)
 
 - |Feature| Partial dependence plots
-  (:func:`inspection.plot_partial_dependence`) are now supported for
+  (`inspection.plot_partial_dependence`) are now supported for
   any regressor or classifier (provided that they have a `predict_proba`
   method). :pr:`12599` by :user:`Trevor Stephens <trevorstephens>` and
   :user:`Nicolas Hug <NicolasHug>`.
@@ -627,7 +627,7 @@ Support for Python 3.4 and below has been officially dropped.
   users to compute :class:`linear_model.lars_path` without providing
   ``X`` and ``y``. :pr:`11699` by :user:`Kuai Yu <yukuairoy>`.
 
-- |Efficiency| :func:`linear_model.make_dataset` now preserves
+- |Efficiency| `linear_model.make_dataset` now preserves
   ``float32`` and ``float64`` dtypes, reducing memory consumption in stochastic
   gradient, SAG and SAGA solvers.
   :pr:`8769` and :pr:`11000` by
@@ -683,7 +683,7 @@ Support for Python 3.4 and below has been officially dropped.
   case. :pr:`13389` by :user:`Pierre Glaser <pierreglaser>`.
 
 - |Fix| Fixed a bug in
-  :class:`linear_model.stochastic_gradient.BaseSGDClassifier` that was not
+  `linear_model.stochastic_gradient.BaseSGDClassifier` that was not
   deterministic when trained in a multi-class setting on several threads.
   :pr:`13422` by :user:`Clément Doumouro <ClemDoum>`.
 
@@ -708,7 +708,7 @@ Support for Python 3.4 and below has been officially dropped.
   in version 0.23. Use :class:`linear_model.lars_path_gram` instead.
   :pr:`11699` by :user:`Kuai Yu <yukuairoy>`.
 
-- |API| :func:`linear_model.logistic_regression_path` is deprecated
+- |API| `linear_model.logistic_regression_path` is deprecated
   in version 0.21 and will be removed in version 0.23.
   :pr:`12821` by :user:`Nicolas Hug <NicolasHug>`.
 
@@ -719,7 +719,7 @@ Support for Python 3.4 and below has been officially dropped.
 :mod:`sklearn.manifold`
 .......................
 
-- |Efficiency| Make :func:`manifold.tsne.trustworthiness` use an inverted index
+- |Efficiency| Make :func:`manifold.trustworthiness` use an inverted index
   instead of an `np.where` lookup to find the rank of neighbors in the input
   space. This improves efficiency in particular when computed with
   lots of neighbors and/or small datasets.
@@ -789,13 +789,13 @@ Support for Python 3.4 and below has been officially dropped.
   in version 0.21 and will be removed in version 0.23. :pr:`10580` by
   :user:`Reshama Shaikh <reshamas>` and :user:`Sandra Mitrovic <SandraMNE>`.
 
-- |Fix| The function :func:`metrics.pairwise.euclidean_distances`, and 
-  therefore several estimators with ``metric='euclidean'``, suffered from 
-  numerical precision issues with ``float32`` features. Precision has been 
-  increased at the cost of a small drop of performance. :pr:`13554` by 
+- |Fix| The function :func:`metrics.pairwise.euclidean_distances`, and
+  therefore several estimators with ``metric='euclidean'``, suffered from
+  numerical precision issues with ``float32`` features. Precision has been
+  increased at the cost of a small drop of performance. :pr:`13554` by
   :user:`Celelibi` and :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |API| :func:`metrics.jaccard_similarity_score` is deprecated in favour of
+- |API| `metrics.jaccard_similarity_score` is deprecated in favour of
   the more consistent :func:`metrics.jaccard_score`. The former behavior for
   binary and multiclass targets is broken.
   :pr:`13151` by `Joel Nothman`_.
@@ -803,7 +803,7 @@ Support for Python 3.4 and below has been officially dropped.
 :mod:`sklearn.mixture`
 ......................
 
-- |Fix| Fixed a bug in :class:`mixture.BaseMixture` and therefore on estimators
+- |Fix| Fixed a bug in `mixture.BaseMixture` and therefore on estimators
   based on it, i.e. :class:`mixture.GaussianMixture` and
   :class:`mixture.BayesianGaussianMixture`, where ``fit_predict`` and
   ``fit.predict`` were not equivalent. :pr:`13142` by
@@ -865,7 +865,7 @@ Support for Python 3.4 and below has been officially dropped.
   `predict_proba` method incorrectly checked for `predict_proba` attribute in
   the estimator object.
   :pr:`12222` by :user:`Rebekah Kim <rebekahkim>`
-  
+
 :mod:`sklearn.neighbors`
 ........................
 
@@ -958,7 +958,7 @@ Support for Python 3.4 and below has been officially dropped.
 - |API| The default value of `copy` in :func:`preprocessing.quantile_transform`
   will change from False to True in 0.23 in order to make it more consistent
   with the default `copy` values of other functions in
-  :mod:`preprocessing` and prevent unexpected side effects by modifying
+  :mod:`sklearn.preprocessing` and prevent unexpected side effects by modifying
   the value of `X` inplace.
   :pr:`13459` by :user:`Hunter McGushion <HunterMcGushion>`.
 
@@ -976,7 +976,7 @@ Support for Python 3.4 and below has been officially dropped.
 ...................
 
 - |Feature| Decision Trees can now be plotted with matplotlib using
-  :func:`tree.plot_tree` without relying on the ``dot`` library,
+  `tree.plot_tree` without relying on the ``dot`` library,
   removing a hard-to-install dependency. :pr:`8508` by `Andreas Müller`_.
 
 - |Feature| Decision Trees can now be exported in a human readable
@@ -984,7 +984,7 @@ Support for Python 3.4 and below has been officially dropped.
   :pr:`6261` by `Giuseppe Vettigli <JustGlowing>`.
 
 - |Feature| ``get_n_leaves()`` and ``get_depth()`` have been added to
-  :class:`tree.BaseDecisionTree` and consequently all estimators based
+  `tree.BaseDecisionTree` and consequently all estimators based
   on it, including :class:`tree.DecisionTreeClassifier`,
   :class:`tree.DecisionTreeRegressor`, :class:`tree.ExtraTreeClassifier`,
   and :class:`tree.ExtraTreeRegressor`.
@@ -994,7 +994,7 @@ Support for Python 3.4 and below has been officially dropped.
   classification targets with string labels, despite accepting them in `fit`.
   :pr:`11458` by :user:`Mitar Milutinovic <mitar>`.
 
-- |Fix| Fixed an issue with :class:`tree.BaseDecisionTree`
+- |Fix| Fixed an issue with `tree.BaseDecisionTree`
   and consequently all estimators based
   on it, including :class:`tree.DecisionTreeClassifier`,
   :class:`tree.DecisionTreeRegressor`, :class:`tree.ExtraTreeClassifier`,
@@ -1013,7 +1013,7 @@ Support for Python 3.4 and below has been officially dropped.
 
 - |API| Deprecated ``warn_on_dtype`` parameter from :func:`utils.check_array`
   and :func:`utils.check_X_y`. Added explicit warning for dtype conversion
-  in :func:`check_pairwise_arrays` if the ``metric`` being passed is a
+  in `check_pairwise_arrays` if the ``metric`` being passed is a
   pairwise boolean metric.
   :pr:`13382` by :user:`Prathmesh Savale <praths007>`.
 
@@ -1038,7 +1038,7 @@ Multiple modules
   dtype in multiple estimators. :pr:`11973` by :user:`Roman Yurchak
   <rth>`.
 
-- |Fix| Fixed a bug in the implementation of the :func:`our_rand_r`
+- |Fix| Fixed a bug in the implementation of the `our_rand_r`
   helper function that was not behaving consistently across platforms.
   :pr:`13422` by :user:`Madhura Parikh <jdnc>` and
   :user:`Clément Doumouro <ClemDoum>`.
@@ -1083,7 +1083,7 @@ Baibak, daten-kieker, Denis Kataev, Didi Bar-Zev, Dillon Gardner, Dmitry Mottl,
 Dmitry Vukolov, Dougal J. Sutherland, Dowon, drewmjohnston, Dror Atariah,
 Edward J Brown, Ekaterina Krivich, Elizabeth Sander, Emmanuel Arias, Eric
 Chang, Eric Larson, Erich Schubert, esvhd, Falak, Feda Curic, Federico Caselli,
-Frank Hoang, Fibinse Xavier`, Finn O'Shea, Gabriel Marzinotto, Gabriel Vacaliuc, 
+Frank Hoang, Fibinse Xavier`, Finn O'Shea, Gabriel Marzinotto, Gabriel Vacaliuc,
 Gabriele Calvo, Gael Varoquaux, GauravAhlawat, Giuseppe Vettigli, Greg Gandenberger,
 Guillaume Fournier, Guillaume Lemaitre, Gustavo De Mari Pereira, Hanmin Qin,
 haroldfox, hhu-luqi, Hunter McGushion, Ian Sanders, JackLangerman, Jacopo
diff --git a/doc/whats_new/v0.22.rst b/doc/whats_new/v0.22.rst
index 0aae7626e61e6..da2f5e8796db8 100644
--- a/doc/whats_new/v0.22.rst
+++ b/doc/whats_new/v0.22.rst
@@ -27,13 +27,13 @@ Changelog
 :mod:`sklearn.metrics`
 ......................
 
-- |Fix| Fixed a bug in :func:`metrics.plot_roc_curve` where
+- |Fix| Fixed a bug in `metrics.plot_roc_curve` where
   the name of the estimator was passed in the :class:`metrics.RocCurveDisplay`
   instead of the parameter `name`. It results in a different plot when calling
   :meth:`metrics.RocCurveDisplay.plot` for the subsequent times.
   :pr:`16500` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Fix| Fixed a bug in :func:`metrics.plot_precision_recall_curve` where the
+- |Fix| Fixed a bug in `metrics.plot_precision_recall_curve` where the
   name of the estimator was passed in the
   :class:`metrics.PrecisionRecallDisplay` instead of the parameter `name`. It
   results in a different plot when calling
@@ -41,12 +41,12 @@ Changelog
   :pr:`16505` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 :mod:`sklearn.neighbors`
-..............................
+........................
 
-- |Fix| Fix a bug which converted a list of arrays into a 2-D object 
+- |Fix| Fix a bug which converted a list of arrays into a 2-D object
   array instead of a 1-D array containing NumPy arrays. This bug
   was affecting :meth:`neighbors.NearestNeighbors.radius_neighbors`.
-  :pr:`16076` by :user:`Guillaume Lemaitre <glemaitre>` and  
+  :pr:`16076` by :user:`Guillaume Lemaitre <glemaitre>` and
   :user:`Alex Shacked <alexshacked>`.
 
 .. _changes_0_22_1:
@@ -82,18 +82,18 @@ Changelog
   Follow-up of :pr:`15898` by :user:`Shivam Gargsya <shivamgargsya>`.
   :pr:`15933` by :user:`Guillaume Lemaitre <glemaitre>` and `Olivier Grisel`_.
 
-- |Fix| :func:`inspection.plot_partial_dependence` and
+- |Fix| `inspection.plot_partial_dependence` and
   :meth:`inspection.PartialDependenceDisplay.plot` now consistently checks
   the number of axes passed in. :pr:`15760` by `Thomas Fan`_.
 
 :mod:`sklearn.metrics`
 ......................
 
-- |Fix| :func:`metrics.plot_confusion_matrix` now raises error when `normalize`
+- |Fix| `metrics.plot_confusion_matrix` now raises error when `normalize`
   is invalid. Previously, it runs fine with no normalization.
   :pr:`15888` by `Hanmin Qin`_.
 
-- |Fix| :func:`metrics.plot_confusion_matrix` now colors the label color
+- |Fix| `metrics.plot_confusion_matrix` now colors the label color
   correctly to maximize contrast with its background. :pr:`15936` by
   `Thomas Fan`_ and :user:`DizietAsahi`.
 
@@ -101,8 +101,8 @@ Changelog
   value of the ``zero_division`` keyword argument. :pr:`15879`
   by :user:`Bibhash Chandra Mitra <Bibyutatsu>`.
 
-- |Fix| Fixed a bug in :func:`metrics.plot_confusion_matrix` to correctly
-  pass the `values_format` parameter to the :class:`ConfusionMatrixDisplay`
+- |Fix| Fixed a bug in `metrics.plot_confusion_matrix` to correctly
+  pass the `values_format` parameter to the :class:`metrics.ConfusionMatrixDisplay`
   plot() call. :pr:`15937` by :user:`Stephen Blystone <blynotes>`.
 
 :mod:`sklearn.model_selection`
@@ -118,7 +118,7 @@ Changelog
 ..........................
 
 - |Fix| Removed `abstractmethod` decorator for the method `_check_X` in
-  :class:`naive_bayes.BaseNB` that could break downstream projects inheriting
+  `naive_bayes.BaseNB` that could break downstream projects inheriting
   from this deprecated public base class. :pr:`15996` by
   :user:`Brigitta Sipőcz <bsipocz>`.
 
@@ -143,7 +143,7 @@ Changelog
 - |Fix| :func:`utils.check_array` now correctly converts pandas DataFrame with
   boolean columns to floats. :pr:`15797` by `Thomas Fan`_.
 
-- |Fix| :func:`utils.check_is_fitted` accepts back an explicit ``attributes``
+- |Fix| :func:`utils.validation.check_is_fitted` accepts back an explicit ``attributes``
   argument to check for specific attributes as explicit markers of a fitted
   estimator. When no explicit ``attributes`` are provided, only the attributes
   that end with a underscore and do not start with double underscore are used
@@ -390,12 +390,12 @@ Changelog
 :mod:`sklearn.decomposition`
 ............................
 
-- |Efficiency| :class:`decomposition.NMF(solver='mu')` fitted on sparse input
+- |Efficiency| :class:`decomposition.NMF` with `solver="mu"` fitted on sparse input
   matrices now uses batching to avoid briefly allocating an array with size
-  (#non-zero elements, n_components). :pr:`15257` by `Mart Willocx <Maocx>`_.
+  (#non-zero elements, n_components). :pr:`15257` by :user:`Mart Willocx <Maocx>`.
 
-- |Enhancement| :func:`decomposition.dict_learning()` and
-  :func:`decomposition.dict_learning_online()` now accept `method_max_iter` and
+- |Enhancement| :func:`decomposition.dict_learning` and
+  :func:`decomposition.dict_learning_online` now accept `method_max_iter` and
   pass it to :meth:`decomposition.sparse_encode`.
   :issue:`12650` by `Adrin Jalali`_.
 
@@ -451,7 +451,7 @@ Changelog
   - |Feature| Estimators now have an additional `warm_start` parameter that
     enables warm starting. :pr:`14012` by :user:`Johann Faouzi <johannfaouzi>`.
   - |Feature| :func:`inspection.partial_dependence` and
-    :func:`inspection.plot_partial_dependence` now support the fast 'recursion'
+    `inspection.plot_partial_dependence` now support the fast 'recursion'
     method for both estimators. :pr:`13769` by `Nicolas Hug`_.
   - |Enhancement| for :class:`ensemble.HistGradientBoostingClassifier` the
     training loss or score is now monitored on a class-wise stratified
@@ -503,7 +503,7 @@ Changelog
 - |Fix| Stacking and Voting estimators now ensure that their underlying
   estimators are either all classifiers or all regressors.
   :class:`ensemble.StackingClassifier`, :class:`ensemble.StackingRegressor`,
-  and :class:`ensemble.VotingClassifier` and :class:`VotingRegressor`
+  and :class:`ensemble.VotingClassifier` and :class:`ensemble.VotingRegressor`
   now raise consistent error messages.
   :pr:`15084` by `Guillaume Lemaitre`_.
 
@@ -529,10 +529,10 @@ Changelog
   :pr:`14602` by :user:`Gaurav Chawla <getgaurav2>`.
 
 - |Fix| Functions created by ``build_preprocessor`` and ``build_analyzer`` of
-  :class:`feature_extraction.text.VectorizerMixin` can now be pickled.
+  `feature_extraction.text.VectorizerMixin` can now be pickled.
   :pr:`14430` by :user:`Dillon Niederhut <deniederhut>`.
 
-- |Fix| :func:`feature_extraction.text.strip_accents_unicode` now correctly
+- |Fix| `feature_extraction.text.strip_accents_unicode` now correctly
   removes accents from strings that are in NFKD normalized form. :pr:`15100` by
   :user:`Daniel Grady <DGrady>`.
 
@@ -548,8 +548,8 @@ Changelog
 :mod:`sklearn.feature_selection`
 ................................
 
-- |Enhancement| Updated the following :mod:`feature_selection` estimators to allow
-  NaN/Inf values in ``transform`` and ``fit``:
+- |Enhancement| Updated the following :mod:`sklearn.feature_selection`
+  estimators to allow NaN/Inf values in ``transform`` and ``fit``:
   :class:`feature_selection.RFE`, :class:`feature_selection.RFECV`,
   :class:`feature_selection.SelectFromModel`,
   and :class:`feature_selection.VarianceThreshold`. Note that if the underlying
@@ -570,7 +570,7 @@ Changelog
   of generic objects (e.g. strings, trees, graphs, etc.) as the ``X`` argument
   to their training/prediction methods.
   A user-defined kernel should be provided for computing the kernel matrix among
-  the generic objects, and should inherit from :class:`gaussian_process.kernels.GenericKernelMixin`
+  the generic objects, and should inherit from `gaussian_process.kernels.GenericKernelMixin`
   to notify the GPR/GPC model that it handles non-vectorial samples.
   :pr:`15557` by :user:`Yu-Hang Tang <yhtang>`.
 
@@ -616,18 +616,18 @@ Changelog
   respect to a given scoring function. :issue:`13146` by `Thomas Fan`_.
 
 - |Feature| :func:`inspection.partial_dependence` and
-  :func:`inspection.plot_partial_dependence` now support the fast 'recursion'
+  `inspection.plot_partial_dependence` now support the fast 'recursion'
   method for :class:`ensemble.HistGradientBoostingClassifier` and
   :class:`ensemble.HistGradientBoostingRegressor`. :pr:`13769` by
   `Nicolas Hug`_.
 
-- |Enhancement| :func:`inspection.plot_partial_dependence` has been extended to
+- |Enhancement| `inspection.plot_partial_dependence` has been extended to
   now support the new visualization API described in the :ref:`User Guide
   <visualizations>`. :pr:`14646` by `Thomas Fan`_.
 
 - |Enhancement| :func:`inspection.partial_dependence` accepts pandas DataFrame
   and :class:`pipeline.Pipeline` containing :class:`compose.ColumnTransformer`.
-  In addition :func:`inspection.plot_partial_dependence` will use the column
+  In addition `inspection.plot_partial_dependence` will use the column
   names by default when a dataframe is passed.
   :pr:`14028` and :pr:`15429` by `Guillaume Lemaitre`_.
 
@@ -712,14 +712,15 @@ Changelog
 :mod:`sklearn.metrics`
 ......................
 
-- |MajorFeature| :func:`metrics.plot_roc_curve` has been added to plot roc
+- |MajorFeature| `metrics.plot_roc_curve` has been added to plot roc
   curves. This function introduces the visualization API described in
   the :ref:`User Guide <visualizations>`. :pr:`14357` by `Thomas Fan`_.
 
 - |Feature| Added a new parameter ``zero_division`` to multiple classification
-  metrics: :func:`precision_score`, :func:`recall_score`, :func:`f1_score`,
-  :func:`fbeta_score`, :func:`precision_recall_fscore_support`,
-  :func:`classification_report`. This allows to set returned value for
+  metrics: :func:`metrics.precision_score`, :func:`metrics.recall_score`,
+  :func:`metrics.f1_score`, :func:`metrics.fbeta_score`,
+  :func:`metrics.precision_recall_fscore_support`,
+  :func:`metrics.classification_report`. This allows to set returned value for
   ill-defined metrics.
   :pr:`14900` by :user:`Marc Torrellas Socastro <marctorrellas>`.
 
@@ -732,16 +733,16 @@ Changelog
   Gain and Normalized Discounted Cumulative Gain. :pr:`9951` by :user:`Jérôme
   Dockès <jeromedockes>`.
 
-- |Feature| :func:`metrics.plot_precision_recall_curve` has been added to plot
+- |Feature| `metrics.plot_precision_recall_curve` has been added to plot
   precision recall curves. :pr:`14936` by `Thomas Fan`_.
 
-- |Feature| :func:`metrics.plot_confusion_matrix` has been added to plot
+- |Feature| `metrics.plot_confusion_matrix` has been added to plot
   confusion matrices. :pr:`15083` by `Thomas Fan`_.
 
 - |Feature| Added multiclass support to :func:`metrics.roc_auc_score` with
   corresponding scorers `'roc_auc_ovr'`, `'roc_auc_ovo'`,
   `'roc_auc_ovr_weighted'`, and `'roc_auc_ovo_weighted'`.
-  :pr:`12789` and :pr:`15274` by 
+  :pr:`12789` and :pr:`15274` by
   :user:`Kathy Chen <kathyxchen>`, :user:`Mohamed Maskani <maskani-moh>`, and
   `Thomas Fan`_.
 
@@ -877,7 +878,7 @@ Changelog
 .............................
 
 - |Feature| Add `max_fun` parameter in
-  :class:`neural_network.BaseMultilayerPerceptron`,
+  `neural_network.BaseMultilayerPerceptron`,
   :class:`neural_network.MLPRegressor`, and
   :class:`neural_network.MLPClassifier` to give control over
   maximum number of function evaluation to not meet ``tol`` improvement.
@@ -949,7 +950,7 @@ Changelog
   :class:`svm.OneClassSVM` was previously non-initialized, and had size 2. It
   has now size 1 with the correct value. :pr:`15099` by `Nicolas Hug`_.
 
-- |Fix| fixed a bug in :class:`BaseLibSVM._sparse_fit` where n_SV=0 raised a
+- |Fix| fixed a bug in `BaseLibSVM._sparse_fit` where n_SV=0 raised a
   ZeroDivisionError. :pr:`14894` by :user:`Danna Naser <danna-naser>`.
 
 - |Fix| The liblinear solver now supports ``sample_weight``.
@@ -993,14 +994,14 @@ Changelog
   :func:`~utils.estimator_checks.parametrize_with_checks`, to parametrize
   estimator checks for a list of estimators. :pr:`14381` by `Thomas Fan`_.
 
-- |Feature| A new random variable, :class:`utils.fixes.loguniform` implements a
+- |Feature| A new random variable, `utils.fixes.loguniform` implements a
   log-uniform random variable (e.g., for use in RandomizedSearchCV).
   For example, the outcomes ``1``, ``10`` and ``100`` are all equally likely
   for ``loguniform(1, 100)``. See :issue:`11232` by
   :user:`Scott Sievert <stsievert>` and :user:`Nathaniel Saul <sauln>`,
   and `SciPy PR 10815 <https://github.com/scipy/scipy/pull/10815>`.
 
-- |Enhancement| :func:`utils.safe_indexing` (now deprecated) accepts an
+- |Enhancement| `utils.safe_indexing` (now deprecated) accepts an
   ``axis`` parameter to index array-like across rows and columns. The column
   indexing can be done on NumPy array, SciPy sparse matrix, and Pandas
   DataFrame. An additional refactoring was done. :pr:`14035` and :pr:`14475`
@@ -1092,8 +1093,8 @@ These changes mostly affect library developers.
   :pr:`14336` by :user:`Gregory Dexter <gdex1>`.
 
 - Added two common multioutput estimator tests
-  :func:`~utils.estimator_checks.check_classifier_multioutput` and
-  :func:`~utils.estimator_checks.check_regressor_multioutput`.
+  `utils.estimator_checks.check_classifier_multioutput` and
+  `utils.estimator_checks.check_regressor_multioutput`.
   :pr:`13392` by :user:`Rok Mihevc <rok>`.
 
 - |Fix| Added ``check_transformer_data_not_an_array`` to checks where missing
diff --git a/doc/whats_new/v0.23.rst b/doc/whats_new/v0.23.rst
index 9603836496ca2..4c3fd66cbd599 100644
--- a/doc/whats_new/v0.23.rst
+++ b/doc/whats_new/v0.23.rst
@@ -65,7 +65,7 @@ Changelog
 :mod:`sklearn.ensemble`
 .......................
 
-- |Fix| Fixed bug in :class:`ensemble.MultinomialDeviance` where the
+- |Fix| Fixed bug in `ensemble.MultinomialDeviance` where the
   average of logloss was incorrectly calculated as sum of logloss.
   :pr:`17694` by :user:`Markus Rempfler <rempfler>` and
   :user:`Tsutomu Kusanagi <t-kusanagi2>`.
@@ -210,7 +210,7 @@ random sampling procedures.
 - |Fix| :class:`preprocessing.StandardScaler` with `partial_fit` and sparse
   input.
 - |Fix| :class:`preprocessing.Normalizer` with norm='max'
-- |Fix| Any model using the :func:`svm.libsvm` or the :func:`svm.liblinear` solver,
+- |Fix| Any model using the `svm.libsvm` or the `svm.liblinear` solver,
   including :class:`svm.LinearSVC`, :class:`svm.LinearSVR`,
   :class:`svm.NuSVC`, :class:`svm.NuSVR`, :class:`svm.OneClassSVM`,
   :class:`svm.SVC`, :class:`svm.SVR`, :class:`linear_model.LogisticRegression`.
@@ -269,7 +269,7 @@ Changelog
   could not have a `np.int64` type. :pr:`16484`
   by :user:`Jeremie du Boisberranger <jeremiedbb>`.
 
-- |Fix| :class:`cluster.AgglomerativeCluClustering` add specific error when
+- |Fix| :class:`cluster.AgglomerativeClustering` add specific error when
   distance matrix is not square and `affinity=precomputed`.
   :pr:`16257` by :user:`Simona Maggio <simonamaggio>`.
 
@@ -320,10 +320,11 @@ Changelog
   by :user:`Stephanie Andrews <gitsteph>` and
   :user:`Reshama Shaikh <reshamas>`.
 
-- |Feature| embedded dataset loaders :func:`load_breast_cancer`,
-  :func:`load_diabetes`, :func:`load_digits`, :func:`load_iris`,
-  :func:`load_linnerud` and :func:`load_wine` now support loading as a pandas
-  ``DataFrame`` by setting `as_frame=True`. :pr:`15980` by :user:`wconnell` and
+- |Feature| embedded dataset loaders :func:`datasets.load_breast_cancer`,
+  :func:`datasets.load_diabetes`, :func:`datasets.load_digits`,
+  :func:`datasets.load_iris`, :func:`datasets.load_linnerud` and
+  :func:`datasets.load_wine` now support loading as a pandas ``DataFrame`` by
+  setting `as_frame=True`. :pr:`15980` by :user:`wconnell` and
   :user:`Reshama Shaikh <reshamas>`.
 
 - |Enhancement| Added ``return_centers`` parameter  in
@@ -353,8 +354,8 @@ Changelog
   :func:`decomposition.non_negative_factorization` now preserves float32 dtype.
   :pr:`16280` by :user:`Jeremie du Boisberranger <jeremiedbb>`.
 
-- |Enhancement| :func:`TruncatedSVD.transform` is now faster on given sparse
-  ``csc`` matrices. :pr:`16837` by :user:`wornbb`.
+- |Enhancement| :func:`decomposition.TruncatedSVD.transform` is now faster on
+  given sparse ``csc`` matrices. :pr:`16837` by :user:`wornbb`.
 
 - |Fix| :class:`decomposition.PCA` with a float `n_components` parameter, will
   exclusively choose the components that explain the variance greater than
@@ -484,7 +485,7 @@ Changelog
 .........................
 
 - |Feature| :func:`inspection.partial_dependence` and
-  :func:`inspection.plot_partial_dependence` now support the fast 'recursion'
+  `inspection.plot_partial_dependence` now support the fast 'recursion'
   method for :class:`ensemble.RandomForestRegressor` and
   :class:`tree.DecisionTreeRegressor`. :pr:`15864` by
   `Nicolas Hug`_.
@@ -565,7 +566,7 @@ Changelog
 :mod:`sklearn.metrics`
 ......................
 
-- |Enhancement| :func:`metrics.pairwise.pairwise_distances_chunked` now allows
+- |Enhancement| :func:`metrics.pairwise_distances_chunked` now allows
   its ``reduce_func`` to not have a return value, enabling in-place operations.
   :pr:`16397` by `Joel Nothman`_.
 
@@ -584,7 +585,7 @@ Changelog
 
 - |API| Changed the formatting of values in
   :meth:`metrics.ConfusionMatrixDisplay.plot` and
-  :func:`metrics.plot_confusion_matrix` to pick the shorter format (either '2g'
+  `metrics.plot_confusion_matrix` to pick the shorter format (either '2g'
   or 'd'). :pr:`16159` by :user:`Rick Mackenbach <Rick-Mackenbach>` and
   `Thomas Fan`_.
 
@@ -607,7 +608,7 @@ Changelog
   `method="predict_proba"` when `y=None`. :pr:`15918` by
   :user:`Luca Kubin <lkubin>`.
 
-- |Fix| :func:`model_selection.fit_grid_point` is deprecated in 0.23 and will
+- |Fix| `model_selection.fit_grid_point` is deprecated in 0.23 and will
   be removed in 0.25. :pr:`16401` by
   :user:`Arie Pratama Sutiono <ariepratama>`
 
@@ -703,7 +704,7 @@ Changelog
   crude "modulo" postprocessor used to get a random number in a bounded
   interval was replaced by the tweaked Lemire method as suggested by `this blog
   post <http://www.pcg-random.org/posts/bounded-rands.html>`_.
-  Any model using the :func:`svm.libsvm` or the :func:`svm.liblinear` solver,
+  Any model using the `svm.libsvm` or the `svm.liblinear` solver,
   including :class:`svm.LinearSVC`, :class:`svm.LinearSVR`,
   :class:`svm.NuSVC`, :class:`svm.NuSVR`, :class:`svm.OneClassSVM`,
   :class:`svm.SVC`, :class:`svm.SVR`, :class:`linear_model.LogisticRegression`,
@@ -756,7 +757,7 @@ Changelog
   matrix from a pandas DataFrame that contains only `SparseArray` columns.
   :pr:`16728` by `Thomas Fan`_.
 
-- |Enhancement| :func:`utils.validation.check_array` supports pandas'
+- |Enhancement| :func:`utils.check_array` supports pandas'
   nullable integer dtype with missing values when `force_all_finite` is set to
   `False` or `'allow-nan'` in which case the data is converted to floating
   point values where `pd.NA` values are replaced by `np.nan`. As a consequence,
@@ -776,7 +777,7 @@ Changelog
   in the MRO for `_get_tags()` to work properly.
   :pr:`16950` by `Nicolas Hug`_.
 
-- |FIX| :func:`utils.all_estimators` now only returns public estimators.
+- |FIX| `utils.all_estimators` now only returns public estimators.
   :pr:`15380` by `Thomas Fan`_.
 
 Miscellaneous
diff --git a/doc/whats_new/v0.24.rst b/doc/whats_new/v0.24.rst
index 35a3c34d7861c..5f6eb4a62bfa4 100644
--- a/doc/whats_new/v0.24.rst
+++ b/doc/whats_new/v0.24.rst
@@ -42,8 +42,8 @@ Changelog
   with `sample_weight` parameter and `least_absolute_deviation` loss function.
   :pr:`19407` by :user:`Vadim Ushtanit <vadim-ushtanit>`.
 
-:mod:`feature_extraction`
-.........................
+:mod:`sklearn.feature_extraction`
+.................................
 
 - |Fix| Fixed a bug to support multiple strings for a category when
   `sparse=False` in :class:`feature_extraction.DictVectorizer`.
@@ -119,7 +119,7 @@ Changelog
   :class:`preprocessing.OrdinalEncoder`.
   :pr:`19727` by :user:`Andrew Delong <andrewdelong>`.
 
-- |Fix| :meth:`preprocessing.OrdinalEncoder.transfrom` correctly handles
+- |Fix| :meth:`preprocessing.OrdinalEncoder.transform` correctly handles
   unknown values for string dtypes. :pr:`19888` by `Thomas Fan`_.
 
 - |Fix| :meth:`preprocessing.OneHotEncoder.fit` no longer alters the `drop`
@@ -135,7 +135,7 @@ Changelog
 :mod:`sklearn.tree`
 ...................
 
-- |Fix| Fix a bug in `fit` of :class:`tree.BaseDecisionTree` that caused
+- |Fix| Fix a bug in `fit` of `tree.BaseDecisionTree` that caused
   segmentation faults under certain conditions. `fit` now deep copies the
   `Criterion` object to prevent shared concurrent accesses.
   :pr:`19580` by :user:`Samuel Brice <samdbrice>` and
@@ -320,12 +320,6 @@ Changelog
 - |Fix| Increases the stability of :class:`cross_decomposition.CCA` :pr:`18746`
   by `Thomas Fan`_.
 
-- |API| For :class:`cross_decomposition.NMF`,
-  the `init` value, when 'init=None' and
-  n_components <= min(n_samples, n_features) will be changed from
-  `'nndsvd'` to `'nndsvda'` in 1.1 (renaming of 0.26).
-  :pr:`18525` by :user:`Chiara Marmo <cmarmo>`.
-
 - |API| The bounds of the `n_components` parameter is now restricted:
 
   - into `[1, min(n_samples, n_features, n_targets)]`, for
@@ -395,6 +389,12 @@ Changelog
 :mod:`sklearn.decomposition`
 ............................
 
+- |API| For :class:`decomposition.NMF`,
+  the `init` value, when 'init=None' and
+  n_components <= min(n_samples, n_features) will be changed from
+  `'nndsvd'` to `'nndsvda'` in 1.1 (renaming of 0.26).
+  :pr:`18525` by :user:`Chiara Marmo <cmarmo>`.
+
 - |Enhancement| :func:`decomposition.FactorAnalysis` now supports the optional
   argument `rotation`, which can take the value `None`, `'varimax'` or
   `'quartimax'`. :pr:`11064` by :user:`Jona Sassenhagen <jona-sassenhagen>`.
@@ -403,7 +403,7 @@ Changelog
   `regularization`, which can take the values `None`, 'components',
   'transformation' or 'both', in accordance with
   :func:`decomposition.NMF.non_negative_factorization`.
-  :pr:`17414` by :user:`Bharat Raghunathan <Bharat123rox>`.
+  :pr:`17414` by :user:`Bharat Raghunathan <bharatr21>`.
 
 - |Fix| :class:`decomposition.KernelPCA` behaviour is now more consistent
   between 32-bits and 64-bits data input when the kernel has small positive
@@ -418,8 +418,9 @@ Changelog
   parameter.
   :pr:`17679` by :user:`Xavier Dupré <sdpython>`.
 
-- |Fix| :meth:`TruncatedSVD.fit_transform` consistently returns the same
-  as :meth:`TruncatedSVD.fit` followed by :meth:`TruncatedSVD.transform`.
+- |Fix| :meth:`decomposition.TruncatedSVD.fit_transform` consistently returns
+  the same as :meth:`decomposition.TruncatedSVD.fit` followed by
+  :meth:`decomposition.TruncatedSVD.transform`.
   :pr:`18528` by :user:`Albert Villanova del Moral <albertvillanova>` and
   :user:`Ruifeng Zheng <zhengruifeng>`.
 
@@ -474,8 +475,8 @@ Changelog
 :mod:`sklearn.exceptions`
 .........................
 
-- |API| :class:`exceptions.ChangedBehaviorWarning` and
-  :class:`exceptions.NonBLASDotWarning` are deprecated and will be removed in
+- |API| `exceptions.ChangedBehaviorWarning` and
+  `exceptions.NonBLASDotWarning` are deprecated and will be removed in
   1.1 (renaming of 0.26).
   :pr:`17804` by `Adrin Jalali`_.
 
@@ -486,7 +487,7 @@ Changelog
   values for one categorical feature. :pr:`17367` by :user:`Peng Yu <yupbank>`
   and :user:`Chiara Marmo <cmarmo>`.
 
-- |Fix| :class:`feature_extraction.CountVectorizer` raises an issue if a
+- |Fix| :class:`feature_extraction.text.CountVectorizer` raises an issue if a
   custom token pattern which capture more than one group is provided.
   :pr:`15427` by :user:`Gangesh Gudmalwar <ggangesh>` and
   :user:`Erin R Hoffman <hoffm386>`.
@@ -520,7 +521,7 @@ Changelog
 ...............................
 
 - |Enhancement| A new method
-  :meth:`gaussian_process.Kernel._check_bounds_params` is called after
+  `gaussian_process.kernel._check_bounds_params` is called after
   fitting a Gaussian Process and raises a ``ConvergenceWarning`` if the bounds
   of the hyperparameters are too tight.
   :issue:`12638` by :user:`Sylvain Lannuzel <SylvainLan>`.
@@ -555,7 +556,7 @@ Changelog
 .........................
 
 - |Feature| :func:`inspection.partial_dependence` and
-  :func:`inspection.plot_partial_dependence` now support calculating and
+  `inspection.plot_partial_dependence` now support calculating and
   plotting Individual Conditional Expectation (ICE) curves controlled by the
   ``kind`` parameter.
   :pr:`16619` by :user:`Madhura Jayratne <madhuracj>`.
@@ -652,7 +653,7 @@ Changelog
   generalization of :func:`metrics.top_k_accuracy_score`, the difference is
   that a prediction is considered correct as long as the true label is
   associated with one of the `k` highest predicted scores.
-  :func:`accuracy_score` is the special case of `k = 1`.
+  :func:`metrics.accuracy_score` is the special case of `k = 1`.
   :pr:`16625` by :user:`Geoffrey Bolmier <gbolmier>`.
 
 - |Feature| Added :func:`metrics.det_curve` to compute Detection Error Tradeoff
@@ -660,7 +661,7 @@ Changelog
   :pr:`10591` by :user:`Jeremy Karnowski <jkarnows>` and
   :user:`Daniel Mohns <dmohns>`.
 
-- |Feature| Added :func:`metrics.plot_det_curve` and
+- |Feature| Added `metrics.plot_det_curve` and
   :class:`metrics.DetCurveDisplay` to ease the plot of DET curves.
   :pr:`18176` by :user:`Guillaume Lemaitre <glemaitre>`.
 
@@ -674,25 +675,21 @@ Changelog
   Rand index.
   :pr:`17412` by :user:`Uwe F Mayer <ufmayer>`.
 
-- |Feature| :func:`metrics.plot_confusion_matrix` now supports making colorbar
+- |Feature| `metrics.plot_confusion_matrix` now supports making colorbar
   optional in the matplotlib plot by setting `colorbar=False`. :pr:`17192` by
   :user:`Avi Gupta <avigupta2612>`
 
-- |Feature| :func:`metrics.plot_confusion_matrix` now supports making colorbar
-  optional in the matplotlib plot by setting colorbar=False. :pr:`17192` by
-  :user:`Avi Gupta <avigupta2612>`.
-
 - |Enhancement| Add `sample_weight` parameter to
   :func:`metrics.median_absolute_error`. :pr:`17225` by
   :user:`Lucy Liu <lucyleeow>`.
 
 - |Enhancement| Add `pos_label` parameter in
-  :func:`metrics.plot_precision_recall_curve` in order to specify the positive
+  `metrics.plot_precision_recall_curve` in order to specify the positive
   class to be used when computing the precision and recall statistics.
   :pr:`17569` by :user:`Guillaume Lemaitre <glemaitre>`.
 
 - |Enhancement| Add `pos_label` parameter in
-  :func:`metrics.plot_roc_curve` in order to specify the positive
+  `metrics.plot_roc_curve` in order to specify the positive
   class to be used when computing the roc auc statistics.
   :pr:`17651` by :user:`Clara Matos <claramatos>`.
 
@@ -724,7 +721,7 @@ Changelog
   classifiers directly with string labeled target classes.
   :pr:`18114` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Fix| Fixed bug in :func:`metrics.plot_confusion_matrix` where error occurs
+- |Fix| Fixed bug in `metrics.plot_confusion_matrix` where error occurs
   when `y_true` contains labels that were not previously seen by the classifier
   while the `labels` and `display_labels` parameters are set to `None`.
   :pr:`18405` by :user:`Thomas J. Fan <thomasjpfan>` and
@@ -834,7 +831,7 @@ Changelog
 ........................
 
 - |Efficiency| Speed up ``seuclidean``, ``wminkowski``, ``mahalanobis`` and
-  ``haversine`` metrics in :class:`neighbors.DistanceMetric` by avoiding
+  ``haversine`` metrics in `neighbors.DistanceMetric` by avoiding
   unexpected GIL acquiring in Cython when setting ``n_jobs>1`` in
   :class:`neighbors.KNeighborsClassifier`,
   :class:`neighbors.KNeighborsRegressor`,
@@ -844,13 +841,13 @@ Changelog
   and by validating data out of loops.
   :pr:`17038` by :user:`Wenbo Zhao <webber26232>`.
 
-- |Efficiency| :class:`neighbors.NeighborsBase` benefits of an improved
+- |Efficiency| `neighbors.NeighborsBase` benefits of an improved
   `algorithm = 'auto'` heuristic. In addition to the previous set of rules,
   now, when the number of features exceeds 15, `brute` is selected, assuming
   the data intrinsic dimensionality is too high for tree-based methods.
   :pr:`17148` by :user:`Geoffrey Bolmier <gbolmier>`.
 
-- |Fix| :class:`neighbors.BinaryTree`
+- |Fix| `neighbors.BinaryTree`
   will raise a `ValueError` when fitting on data array having points with
   different dimensions.
   :pr:`18691` by :user:`Chiara Marmo <cmarmo>`.
@@ -883,7 +880,7 @@ Changelog
   :class:`neural_network.MLPRegressor`.
   :pr:`17759` by :user:`Srimukh Sripada <d3b0unce>`.
 
-- |Fix| Fix method  :func:`fit` of :class:`neural_network.MLPClassifier`
+- |Fix| Fix method  :meth:`neural_network.MLPClassifier.fit`
   not iterating to ``max_iter`` if warm started.
   :pr:`18269` by :user:`Norbert Preining <norbusan>` and
   :user:`Guillaume Lemaitre <glemaitre>`.
@@ -961,7 +958,7 @@ Changelog
 
 - |Enhancement| invoke SciPy BLAS API for SVM kernel function in ``fit``,
   ``predict`` and related methods of :class:`svm.SVC`, :class:`svm.NuSVC`,
-  :class:`svm.SVR`, :class:`svm.NuSVR`, :class:`OneClassSVM`.
+  :class:`svm.SVR`, :class:`svm.NuSVR`, :class:`svm.OneClassSVM`.
   :pr:`16530` by :user:`Shuhua Fan <jim0421>`.
 
 :mod:`sklearn.tree`
@@ -988,10 +985,10 @@ Changelog
   with different sample order :pr:`17598` by :user:`Jason Ngo <ngojason9>`.
 
 - |Enhancement| Add support for weights in
-  :func:`utils.sparse_func.incr_mean_variance_axis`.
+  `utils.sparse_func.incr_mean_variance_axis`.
   By :user:`Maria Telenczuk <maikia>` and :user:`Alex Gramfort <agramfort>`.
 
-- |Fix| Raise ValueError with clear error message in :func:`check_array`
+- |Fix| Raise ValueError with clear error message in :func:`utils.check_array`
   for sparse DataFrames with mixed types.
   :pr:`17992` by :user:`Thomas J. Fan <thomasjpfan>` and
   :user:`Alex Shacked <alexshacked>`.
@@ -1001,7 +998,7 @@ Changelog
   :pr:`17644` by :user:`Qi Zhang <qzhang90>`.
 
 - |Fix| Check that we raise proper error when axis=1 and the
-  dimensions do not match in :func:`utils.sparse_func.incr_mean_variance_axis`.
+  dimensions do not match in `utils.sparse_func.incr_mean_variance_axis`.
   By :user:`Alex Gramfort <agramfort>`.
 
 Miscellaneous
diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst
index 9d23e98838e98..0119b5bf26011 100644
--- a/doc/whats_new/v1.0.rst
+++ b/doc/whats_new/v1.0.rst
@@ -96,7 +96,7 @@ Changelog
   This fixes a regression introduced in 1.0.0 with respect to 0.24.2.
   :pr:`21694` by :user:`Julien Jerphanion <jjerphan>`.
 
-- |Fix| All :class:`sklearn.metrics.MinkowskiDistance` now accepts a weight
+- |Fix| All `sklearn.metrics.MinkowskiDistance` now accepts a weight
   parameter that makes it possible to write code that behaves consistently both
   with scipy 1.8 and earlier versions. In turns this means that all
   neighbors-based estimators (except those that use `algorithm="kd_tree"`) now
@@ -205,8 +205,8 @@ Fixed models
   longer checks for uppercase characters in the provided vocabulary. :pr:`21251`
   by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| Fixed a bug in :class:`feature_extraction.CountVectorizer` and
-  :class:`feature_extraction.TfidfVectorizer` by raising an
+- |Fix| Fixed a bug in :class:`feature_extraction.text.CountVectorizer` and
+  :class:`feature_extraction.text.TfidfVectorizer` by raising an
   error when 'min_idf' or 'max_idf' are floating-point numbers greater than 1.
   :pr:`20752` by :user:`Alek Lefebvre <AlekLefebvre>`.
 
@@ -250,7 +250,7 @@ Fixed models
 :mod:`sklearn.utils`
 ....................
 
-- |Enhancement| :func:`utils.validation._check_sample_weight` can perform a
+- |Enhancement| `utils.validation._check_sample_weight` can perform a
   non-negativity check on the sample weights. It can be turned on
   using the only_non_negative bool parameter.
   Estimators that check for non-negative weights are updated:
@@ -569,7 +569,7 @@ Changelog
 - |Fix| :func:`datasets.fetch_kddcup99` returns dataframes when
   `return_X_y=True` and `as_frame=True`. :pr:`19011` by `Thomas Fan`_.
 
-- |API| Deprecates :func:`datasets.load_boston` in 1.0 and it will be removed
+- |API| Deprecates `datasets.load_boston` in 1.0 and it will be removed
   in 1.2. Alternative code snippets to load similar datasets are provided.
   Please report to the docstring of the function for details.
   :pr:`20729` by `Guillaume Lemaitre`_.
@@ -587,7 +587,7 @@ Changelog
 - |Fix| Fixes incorrect multiple data-conversion warnings when clustering
   boolean data. :pr:`19046` by :user:`Surya Prakash <jdsurya>`.
 
-- |Fix| Fixed :func:`dict_learning`, used by
+- |Fix| Fixed :func:`decomposition.dict_learning`, used by
   :class:`decomposition.DictionaryLearning`, to ensure determinism of the
   output. Achieved by flipping signs of the SVD output which is used to
   initialize the code. :pr:`18433` by :user:`Bruno Charron <brcharron>`.
@@ -613,7 +613,7 @@ Changelog
   to `alpha` instead of 1.0 by default starting from version 1.2 :pr:`19159` by
   :user:`Benoît Malézieux <bmalezieux>`.
 
-- |API| Rename variable names in :class:`KernelPCA` to improve
+- |API| Rename variable names in :class:`decomposition.KernelPCA` to improve
   readability. `lambdas_` and `alphas_` are renamed to `eigenvalues_`
   and `eigenvectors_`, respectively. `lambdas_` and `alphas_` are
   deprecated and will be removed in 1.2.
@@ -744,7 +744,7 @@ Changelog
   :pr:`20431` by :user:`Oliver Pfaffel <o1iv3r>`.
 
 - |Enhancement| Add kwargs to format ICE and PD lines separately in partial
-  dependence plots :func:`inspection.plot_partial_dependence` and
+  dependence plots `inspection.plot_partial_dependence` and
   :meth:`inspection.PartialDependenceDisplay.plot`. :pr:`19428` by :user:`Mehdi
   Hamoumi <mhham>`.
 
@@ -754,7 +754,7 @@ Changelog
 
 - |API| :class:`inspection.PartialDependenceDisplay` exposes a class method:
   :func:`~inspection.PartialDependenceDisplay.from_estimator`.
-  :func:`inspection.plot_partial_dependence` is deprecated in favor of the
+  `inspection.plot_partial_dependence` is deprecated in favor of the
   class method and will be removed in 1.2. :pr:`20959` by `Thomas Fan`_.
 
 :mod:`sklearn.kernel_approximation`
@@ -939,7 +939,7 @@ Changelog
   :pr:`18328` by :user:`Albert Villanova del Moral <albertvillanova>` and
   :user:`Alonso Silva Allende <alonsosilvaallende>`.
 
-- |Fix| avoid overflow in :func:`metrics.cluster.adjusted_rand_score` with
+- |Fix| avoid overflow in :func:`metrics.adjusted_rand_score` with
   large amount of data. :pr:`20312` by :user:`Divyanshu Deoli
   <divyanshudeoli>`.
 
@@ -947,7 +947,7 @@ Changelog
   :func:`~metrics.ConfusionMatrixDisplay.from_estimator` and
   :func:`~metrics.ConfusionMatrixDisplay.from_predictions` allowing to create
   a confusion matrix plot using an estimator or the predictions.
-  :func:`metrics.plot_confusion_matrix` is deprecated in favor of these two
+  `metrics.plot_confusion_matrix` is deprecated in favor of these two
   class methods and will be removed in 1.2.
   :pr:`18543` by `Guillaume Lemaitre`_.
 
@@ -955,7 +955,7 @@ Changelog
   :func:`~metrics.PrecisionRecallDisplay.from_estimator` and
   :func:`~metrics.PrecisionRecallDisplay.from_predictions` allowing to create
   a precision-recall curve using an estimator or the predictions.
-  :func:`metrics.plot_precision_recall_curve` is deprecated in favor of these
+  `metrics.plot_precision_recall_curve` is deprecated in favor of these
   two class methods and will be removed in 1.2.
   :pr:`20552` by `Guillaume Lemaitre`_.
 
@@ -963,7 +963,7 @@ Changelog
   :func:`~metrics.DetCurveDisplay.from_estimator` and
   :func:`~metrics.DetCurveDisplay.from_predictions` allowing to create
   a confusion matrix plot using an estimator or the predictions.
-  :func:`metrics.plot_det_curve` is deprecated in favor of these two
+  `metrics.plot_det_curve` is deprecated in favor of these two
   class methods and will be removed in 1.2.
   :pr:`19278` by `Guillaume Lemaitre`_.
 
@@ -990,7 +990,7 @@ Changelog
 - |Enhancement| warn only once in the main process for per-split fit failures
   in cross-validation. :pr:`20619` by :user:`Loïc Estève <lesteve>`
 
-- |Enhancement| The :class:`model_selection.BaseShuffleSplit` base class is
+- |Enhancement| The `model_selection.BaseShuffleSplit` base class is
   now public. :pr:`20056` by :user:`pabloduque0`.
 
 - |Fix| Avoid premature overflow in :func:`model_selection.train_test_split`.
@@ -1020,7 +1020,7 @@ Changelog
   :pr:`19473` by :user:`jiefangxuanyan <jiefangxuanyan>` and
   :user:`Julien Jerphanion <jjerphan>`.
 
-- |FIX| :class:`neighbors.DistanceMetric` subclasses now support readonly
+- |FIX| `neighbors.DistanceMetric` subclasses now support readonly
   memory-mapped datasets. :pr:`19883` by :user:`Julien Jerphanion <jjerphan>`.
 
 - |FIX| :class:`neighbors.NearestNeighbors`, :class:`neighbors.KNeighborsClassifier`,
@@ -1178,11 +1178,11 @@ Changelog
   :func:`utils.deprecated` are now properly wrapped. :pr:`20385` by `Thomas
   Fan`_.
 
-- |Fix| :func:`utils.stats._weighted_percentile` now correctly ignores
+- |Fix| `utils.stats._weighted_percentile` now correctly ignores
   zero-weighted observations smaller than the smallest observation with
   positive weight for ``percentile=0``. Affected classes are
   :class:`dummy.DummyRegressor` for ``quantile=0`` and
-  :class:`ensemble.HuberLossFunction` and :class:`ensemble.HuberLossFunction`
+  `ensemble.HuberLossFunction` and `ensemble.HuberLossFunction`
   for ``alpha=0``. :pr:`20528` by :user:`Malte Londschien <mlondschien>`.
 
 - |Fix| :func:`utils._safe_indexing` explicitly takes a dataframe copy when
@@ -1194,7 +1194,7 @@ Changelog
   :func:`model_selection.cross_val_predict`).
   :pr:`20673` by :user:`Joris Van den Bossche  <jorisvandenbossche>`.
 
-- |Fix| Fix a regression in :func:`utils.is_scalar_nan` where large Python
+- |Fix| Fix a regression in `utils.is_scalar_nan` where large Python
   numbers would raise an error due to overflow in C types (`np.float64` or
   `np.int64`).
   :pr:`20727` by `Guillaume Lemaitre`_.
@@ -1208,7 +1208,7 @@ Changelog
   manager instead. Note that these functions were not documented and part from
   the public API. :pr:`20521` by :user:`Olivier Grisel <ogrisel>`.
 
-- |API| Fixed several bugs in :func:`utils.graph.graph_shortest_path`, which is
+- |API| Fixed several bugs in `utils.graph.graph_shortest_path`, which is
   now deprecated. Use `scipy.sparse.csgraph.shortest_path` instead. :pr:`20531`
   by `Tom Dupre la Tour`_.
 
diff --git a/doc/whats_new/v1.1.rst b/doc/whats_new/v1.1.rst
index e2ac0be0a08cc..c5e64bbd5882b 100644
--- a/doc/whats_new/v1.1.rst
+++ b/doc/whats_new/v1.1.rst
@@ -62,7 +62,7 @@ Changelog
 :mod:`sklearn.base`
 ...................
 
-- |Fix| The `get_params` method of the :class:`BaseEstimator` class now supports
+- |Fix| The `get_params` method of the :class:`base.BaseEstimator` class now supports
   estimators with `type`-type params that have the `get_params` method.
   :pr:`24017` by :user:`Henry Sorsky <hsorsky>`.
 
@@ -544,7 +544,7 @@ Changelog
   :pr:`22002` by :user:`Takeshi Oura <takoika>`.
 
 - |Enhancement| :class:`decomposition.PCA` exposes a parameter `n_oversamples` to tune
-  :func:`utils.randomized_svd` and get accurate results when the number of
+  :func:`utils.extmath.randomized_svd` and get accurate results when the number of
   features is large.
   :pr:`21109` by :user:`Smile <x-shadow-man>`.
 
@@ -591,13 +591,14 @@ Changelog
   `Thomas Fan`_.
 
 - |Enhancement| :class:`decomposition.TruncatedSVD` exposes the parameter
-  `n_oversamples` and `power_iteration_normalizer` to tune :func:`utils.randomized_svd`
-  and get accurate results when the number of features is large, the rank of the matrix
-  is high, or other features of the matrix make low rank approximation difficult.
+  `n_oversamples` and `power_iteration_normalizer` to tune
+  :func:`utils.extmath.randomized_svd` and get accurate results when the number
+  of features is large, the rank of the matrix is high, or other features of
+  the matrix make low rank approximation difficult.
   :pr:`21705` by :user:`Jay S. Stanley III <stanleyjs>`.
 
 - |Enhancement| :class:`decomposition.PCA` exposes the parameter
-  `power_iteration_normalizer` to tune :func:`utils.randomized_svd` and
+  `power_iteration_normalizer` to tune :func:`utils.extmath.randomized_svd` and
   get more accurate results when low rank approximation is difficult.
   :pr:`21705` by :user:`Jay S. Stanley III <stanleyjs>`.
 
@@ -661,7 +662,7 @@ Changelog
   The quantile level can be specified with the new parameter `quantile`.
   :pr:`21800` and :pr:`20567` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Efficiency| :meth:`fit` of :class:`ensemble.GradientBoostingClassifier`
+- |Efficiency| `fit` of :class:`ensemble.GradientBoostingClassifier`
   and :class:`ensemble.GradientBoostingRegressor` now calls :func:`utils.check_array`
   with parameter `force_all_finite=False` for non initial warm-start runs as it has
   already been checked before.
@@ -838,7 +839,7 @@ Changelog
 
 - |Enhancement| :meth:`inspection.PartialDependenceDisplay.from_estimator`,
   :meth:`inspection.PartialDependenceDisplay.plot`, and
-  :func:`inspection.plot_partial_dependence` now support plotting centered
+  `inspection.plot_partial_dependence` now support plotting centered
   Individual Conditional Expectation (cICE) and centered PDP curves controlled
   by setting the parameter `centered`.
   :pr:`18310` by :user:`Johannes Elfner <JoElfner>` and
diff --git a/doc/whats_new/v1.2.rst b/doc/whats_new/v1.2.rst
index f2b352a16a10a..f77af841608e3 100644
--- a/doc/whats_new/v1.2.rst
+++ b/doc/whats_new/v1.2.rst
@@ -76,7 +76,7 @@ Changelog
 :mod:`sklearn.preprocessing`
 ............................
 
-- |Fix| :attr:`preprocessing.OneHotEncoder.drop_idx_` now properly 
+- |Fix| `preprocessing.OneHotEncoder.drop_idx_` now properly
   references the dropped category in the `categories_` attribute
   when there are infrequent categories. :pr:`25589` by `Thomas Fan`_.
 
@@ -118,9 +118,10 @@ parameters, may produce different models from the previous version. This often
 occurs due to changes in the modelling logic (bug fixes or enhancements), or in
 random sampling procedures.
 
-- |Fix| The fitted components in :class:`MiniBatchDictionaryLearning` might differ. The
-  online updates of the sufficient statistics now properly take the sizes of the batches
-  into account.
+- |Fix| The fitted components in
+  :class:`decomposition.MiniBatchDictionaryLearning` might differ. The online
+  updates of the sufficient statistics now properly take the sizes of the
+  batches into account.
   :pr:`25354` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
 - |Fix| The `categories_` attribute of :class:`preprocessing.OneHotEncoder` now
@@ -227,7 +228,7 @@ Changelog
 
 - |Fix| Improves error message in :class:`neural_network.MLPClassifier` and
   :class:`neural_network.MLPRegressor`, when `early_stopping=True` and
-  :meth:`partial_fit` is called. :pr:`25694` by `Thomas Fan`_.
+  `partial_fit` is called. :pr:`25694` by `Thomas Fan`_.
 
 :mod:`sklearn.preprocessing`
 ............................
@@ -255,7 +256,7 @@ Changelog
   boolean. The type is maintained, instead of converting to `float64.`
   :pr:`25147` by :user:`Tim Head <betatim>`.
 
-- |API| :func:`utils.fixes.delayed` is deprecated in 1.2.1 and will be removed
+- |API| `utils.fixes.delayed` is deprecated in 1.2.1 and will be removed
   in 1.5. Instead, import :func:`utils.parallel.delayed` and use it in
   conjunction with the newly introduced :func:`utils.parallel.Parallel`
   to ensure proper propagation of the scikit-learn configuration to
@@ -294,7 +295,7 @@ random sampling procedures.
   to a tiny value. Moreover, `verbose` is now properly propagated to L-BFGS-B.
   :pr:`23619` by :user:`Christian Lorentzen <lorentzenchr>`.
 
-- |Enhancement| The default value for `eps` :func:`metrics.logloss` has changed
+- |Enhancement| The default value for `eps` :func:`metrics.log_loss` has changed
   from `1e-15` to `"auto"`. `"auto"` sets `eps` to `np.finfo(y_pred.dtype).eps`.
   :pr:`24354` by :user:`Safiuddin Khaja <Safikh>` and :user:`gsiisg <gsiisg>`.
 
@@ -306,7 +307,7 @@ random sampling procedures.
   :pr:`22527` by :user:`Meekail Zain <micky774>` and `Thomas Fan`_.
 
 - |Fix| The condition for early stopping has now been changed in
-  :func:`linear_model._sgd_fast._plain_sgd` which is used by
+  `linear_model._sgd_fast._plain_sgd` which is used by
   :class:`linear_model.SGDRegressor` and :class:`linear_model.SGDClassifier`. The old
   condition did not disambiguate between
   training and validation set and had an effect of overscaling the error tolerance.
@@ -319,7 +320,7 @@ random sampling procedures.
 
 - |API| The default value of `tol` was changed from `1e-3` to `1e-4` for
   :func:`linear_model.ridge_regression`, :class:`linear_model.Ridge` and
-  :class:`linear_model.`RidgeClassifier`.
+  :class:`linear_model.RidgeClassifier`.
   :pr:`24465` by :user:`Christian Lorentzen <lorentzenchr>`.
 
 Changes impacting all modules
@@ -358,8 +359,8 @@ Changes impacting all modules
   - :class:`sklearn.semi_supervised.LabelPropagation`
   - :class:`sklearn.semi_supervised.LabelSpreading`
 
-  For instance :class:`sklearn.neighbors.NearestNeighbors.kneighbors` and
-  :class:`sklearn.neighbors.NearestNeighbors.radius_neighbors`
+  For instance :meth:`sklearn.neighbors.NearestNeighbors.kneighbors` and
+  :meth:`sklearn.neighbors.NearestNeighbors.radius_neighbors`
   can respectively be up to ×20 and ×5 faster than previously on a laptop.
 
   Moreover, implementations of those two algorithms are now suitable
@@ -796,7 +797,7 @@ Changelog
   (`average="micro"`) for the One-vs-Rest multiclass case (`multi_class="ovr"`).
   :pr:`24338` by :user:`Arturo Amor <ArturoAmorQ>`.
 
-- |Enhancement| Adds an `"auto"` option to `eps` in :func:`metrics.logloss`.
+- |Enhancement| Adds an `"auto"` option to `eps` in :func:`metrics.log_loss`.
   This option will automatically set the `eps` value depending on the data
   type of `y_pred`. In addition, the default value of `eps` is changed from
   `1e-15` to the new `"auto"` option.
@@ -887,7 +888,7 @@ Changelog
   :pr:`10468` by :user:`Ruben <icfly2>` and :pr:`22993` by
   :user:`Jovan Stojanovic <jovan-stojanovic>`.
 
-- |Enhancement| :class:`neighbors.NeighborsBase` now accepts
+- |Enhancement| `neighbors.NeighborsBase` now accepts
   Minkowski semi-metric (i.e. when :math:`0 < p < 1` for
   `metric="minkowski"`) for `algorithm="auto"` or `algorithm="brute"`.
   :pr:`24750` by :user:`Rudresh Veerkhare <RudreshVeerkhare>`
@@ -970,7 +971,7 @@ Changelog
 - |Enhancement| :func:`utils.validation.column_or_1d` now accepts a `dtype`
   parameter to specific `y`'s dtype. :pr:`22629` by `Thomas Fan`_.
 
-- |Enhancement| :func:`utils.extmath.cartesian` now accepts arrays with different
+- |Enhancement| `utils.extmath.cartesian` now accepts arrays with different
   `dtype` and will cast the output to the most permissive `dtype`.
   :pr:`25067` by :user:`Guillaume Lemaitre <glemaitre>`.
 
@@ -1062,4 +1063,4 @@ Pitters, Tom Dupré la Tour, tomiock, Tom Mathews, Tom McTiernan, tspeng, Tyler
 Egashira, Valentin Laurent, Varun Jain, Vera Komeyer, Vicente Reyes-Puerta,
 Vinayak Mehta, Vincent M, Vishal, Vyom Pathak, wattai, wchathura, WEN Hao,
 William M, x110, Xiao Yuan, Xunius, yanhong-zhao-ef, Yusuf Raji, Z Adil Khwaja,
-zeeshan lone
\ No newline at end of file
+zeeshan lone
diff --git a/doc/whats_new/v1.3.rst b/doc/whats_new/v1.3.rst
index 41c03293cf067..70d14ab285eea 100644
--- a/doc/whats_new/v1.3.rst
+++ b/doc/whats_new/v1.3.rst
@@ -2,12 +2,184 @@
 
 .. currentmodule:: sklearn
 
+.. _changes_1_3_2:
+
+Version 1.3.2
+=============
+
+**October 2023**
+
+Changelog
+---------
+
+:mod:`sklearn.datasets`
+.......................
+
+- |Fix| All dataset fetchers now accept `data_home` as any object that implements
+  the :class:`os.PathLike` interface, for instance, :class:`pathlib.Path`.
+  :pr:`27468` by :user:`Yao Xiao <Charlie-XIAO>`.
+
+:mod:`sklearn.decomposition`
+............................
+
+- |Fix| Fixes a bug in :class:`decomposition.KernelPCA` by forcing the output of
+  the internal :class:`preprocessing.KernelCenterer` to be a default array. When the
+  arpack solver is used, it expects an array with a `dtype` attribute.
+  :pr:`27583` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.metrics`
+......................
+
+- |Fix| Fixes a bug for metrics using `zero_division=np.nan`
+  (e.g. :func:`~metrics.precision_score`) within a paralell loop
+  (e.g. :func:`~model_selection.cross_val_score`) where the singleton for `np.nan`
+  will be different in the sub-processes.
+  :pr:`27573` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.tree`
+...................
+
+- |Fix| Do not leak data via non-initialized memory in decision tree pickle files and make
+  the generation of those files deterministic. :pr:`27580` by :user:`Loïc Estève <lesteve>`.
+
+
+.. _changes_1_3_1:
+
+Version 1.3.1
+=============
+
+**September 2023**
+
+Changed models
+--------------
+
+The following estimators and functions, when fit with the same data and
+parameters, may produce different models from the previous version. This often
+occurs due to changes in the modelling logic (bug fixes or enhancements), or in
+random sampling procedures.
+
+- |Fix| Ridge models with `solver='sparse_cg'` may have slightly different
+  results with scipy>=1.12, because of an underlying change in the scipy solver
+  (see `scipy#18488 <https://github.com/scipy/scipy/pull/18488>`_ for more
+  details)
+  :pr:`26814` by :user:`Loïc Estève <lesteve>`
+
+Changes impacting all modules
+-----------------------------
+
+- |Fix| The `set_output` API correctly works with list input. :pr:`27044` by
+  `Thomas Fan`_.
+
+Changelog
+---------
+
+:mod:`sklearn.calibration`
+..........................
+
+- |Fix| :class:`calibration.CalibratedClassifierCV` can now handle models that
+  produce large prediction scores. Before it was numerically unstable.
+  :pr:`26913` by :user:`Omar Salman <OmarManzoor>`.
+
+:mod:`sklearn.cluster`
+......................
+
+- |Fix| :class:`cluster.BisectingKMeans` could crash when predicting on data
+  with a different scale than the data used to fit the model.
+  :pr:`27167` by `Olivier Grisel`_.
+
+- |Fix| :class:`cluster.BisectingKMeans` now works with data that has a single feature.
+  :pr:`27243` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+:mod:`sklearn.cross_decomposition`
+..................................
+
+- |Fix| :class:`cross_decomposition.PLSRegression` now automatically ravels the output
+  of `predict` if fitted with one dimensional `y`.
+  :pr:`26602` by :user:`Yao Xiao <Charlie-XIAO>`.
+
+:mod:`sklearn.ensemble`
+.......................
+
+- |Fix| Fix a bug in :class:`ensemble.AdaBoostClassifier` with `algorithm="SAMME"`
+  where the decision function of each weak learner should be symmetric (i.e.
+  the sum of the scores should sum to zero for a sample).
+  :pr:`26521` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.feature_selection`
+................................
+
+- |Fix| :func:`feature_selection.mutual_info_regression` now correctly computes the
+  result when `X` is of integer dtype. :pr:`26748` by :user:`Yao Xiao <Charlie-XIAO>`.
+
+:mod:`sklearn.impute`
+.....................
+
+- |Fix| :class:`impute.KNNImputer` now correctly adds a missing indicator column in
+  ``transform`` when ``add_indicator`` is set to ``True`` and missing values are observed
+  during ``fit``. :pr:`26600` by :user:`Shreesha Kumar Bhat <Shreesha3112>`.
+
+:mod:`sklearn.metrics`
+......................
+
+- |Fix| Scorers used with :func:`metrics.get_scorer` handle properly
+  multilabel-indicator matrix.
+  :pr:`27002` by :user:`Guillaume Lemaitre <glemaitre>`.
+
+:mod:`sklearn.mixture`
+......................
+
+- |Fix| The initialization of :class:`mixture.GaussianMixture` from user-provided
+  `precisions_init` for `covariance_type` of `full` or `tied` was not correct,
+  and has been fixed.
+  :pr:`26416` by :user:`Yang Tao <mchikyt3>`.
+
+:mod:`sklearn.neighbors`
+........................
+
+- |Fix| :meth:`neighbors.KNeighborsClassifier.predict` no longer raises an
+  exception for `pandas.DataFrames` input.
+  :pr:`26772` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
+- |Fix| Reintroduce :attr:`sklearn.neighbors.BallTree.valid_metrics` and
+  :attr:`sklearn.neighbors.KDTree.valid_metrics` as public class attributes.
+  :pr:`26754` by :user:`Julien Jerphanion <jjerphan>`.
+
+- |Fix| :class:`sklearn.model_selection.HalvingRandomSearchCV` no longer raises
+  when the input to the `param_distributions` parameter is a list of dicts.
+  :pr:`26893` by :user:`Stefanie Senger <StefanieSenger>`.
+
+- |Fix| Neighbors based estimators now correctly work when `metric="minkowski"` and the
+  metric parameter `p` is in the range `0 < p < 1`, regardless of the `dtype` of `X`.
+  :pr:`26760` by :user:`Shreesha Kumar Bhat <Shreesha3112>`.
+
+:mod:`sklearn.preprocessing`
+............................
+
+- |Fix| :class:`preprocessing.LabelEncoder` correctly accepts `y` as a keyword
+  argument. :pr:`26940` by `Thomas Fan`_.
+
+- |Fix| :class:`preprocessing.OneHotEncoder` shows a more informative error message
+  when `sparse_output=True` and the output is configured to be pandas.
+  :pr:`26931` by `Thomas Fan`_.
+
+:mod:`sklearn.tree`
+...................
+
+- |Fix| :func:`tree.plot_tree` now accepts `class_names=True` as documented.
+  :pr:`26903` by :user:`Thomas Roehr <2maz>`
+
+- |Fix| The `feature_names` parameter of :func:`tree.plot_tree` now accepts any kind of
+  array-like instead of just a list. :pr:`27292` by :user:`Rahil Parikh <rprkh>`.
+
 .. _changes_1_3:
 
 Version 1.3.0
 =============
 
-**In Development**
+**June 2023**
+
+For a short description of the main highlights of the release, please refer to
+:ref:`sphx_glr_auto_examples_release_highlights_plot_release_highlights_1_3_0.py`.
 
 .. include:: changelog_legend.inc
 
@@ -59,6 +231,10 @@ Changed displays
   past behaviour.
   :pr:`25120` by :user:`Guillaume Lemaitre <glemaitre>`.
 
+- |Fix| :class:`model_selection.ValidationCurveDisplay` now accepts passing a
+  list to the `param_range` parameter.
+  :pr:`27311` by :user:`Arturo Amor <ArturoAmorQ>`.
+
 Changes impacting all modules
 -----------------------------
 
@@ -180,8 +356,8 @@ Changelog
     :pr:`123456` by :user:`Joe Bloggs <joeongithub>`.
     where 123456 is the *pull request* number, not the issue number.
 
-:mod:`sklearn`
-..............
+`sklearn`
+.........
 
 - |Feature| Added a new option `skip_parameter_validation`, to the function
   :func:`sklearn.set_config` and context manager :func:`sklearn.config_context`, that
@@ -231,6 +407,11 @@ Changelog
   :user:`Jérémie du Boisberranger <jeremiedbb>`,
   :user:`Guillaume Lemaitre <glemaitre>`.
 
+- |Fix| :class:`cluster.KMeans`, :class:`cluster.MiniBatchKMeans` and
+  :func:`cluster.k_means` now correctly handle the combination of `n_init="auto"`
+  and `init` being an array-like, running one initialization in that case.
+  :pr:`26657` by :user:`Binesh Bannerjee <bnsh>`.
+
 - |API| The `sample_weight` parameter in `predict` for
   :meth:`cluster.KMeans.predict` and :meth:`cluster.MiniBatchKMeans.predict`
   is now deprecated and will be removed in v1.5.
@@ -242,7 +423,7 @@ Changelog
 :mod:`sklearn.compose`
 ......................
 
-- |Fix| `compose.ColumnTransformer` raises an informative error when the individual
+- |Fix| :class:`compose.ColumnTransformer` raises an informative error when the individual
   transformers of `ColumnTransformer` output pandas dataframes with indexes that are
   not consistent with each other and the output is configured to be pandas.
   :pr:`26286` by `Thomas Fan`_.
@@ -273,7 +454,7 @@ Changelog
   :pr:`26033` by :user:`Genesis Valencia <genvalen>`.
 
 - |API| Adds `eps` parameter in :class:`covariance.GraphicalLasso`,
-  :func:`covariance.graphical_lasso_path`, and :class:`covariance.GraphicalLassoCV`.
+  :func:`covariance.graphical_lasso`, and :class:`covariance.GraphicalLassoCV`.
   :pr:`26033` by :user:`Genesis Valencia <genvalen>`.
 
 :mod:`sklearn.datasets`
@@ -292,7 +473,7 @@ Changelog
   the pandas parser. The parameter `read_csv_kwargs` allows to overwrite this behaviour.
   :pr:`26551` by :user:`Guillaume Lemaitre <glemaitre>`.
 
-- |Fix| :func:`dataasets.fetch_openml` will consistenly use `np.nan` as missing marker
+- |Fix| :func:`datasets.fetch_openml` will consistenly use `np.nan` as missing marker
   with both parsers `"pandas"` and `"liac-arff"`.
   :pr:`26579` by :user:`Guillaume Lemaitre <glemaitre>`.
 
@@ -382,10 +563,10 @@ Changelog
   and :class:`ensemble.AdaBoostRegressor` that was introduced in :pr:`23819`.
   :pr:`26242` by :user:`Marko Toplak <markotoplak>`.
 
-:mod:`sklearn.exception`
-........................
+:mod:`sklearn.exceptions`
+.........................
 
-- |Feature| Added :class:`exception.InconsistentVersionWarning` which is raised
+- |Feature| Added :class:`exceptions.InconsistentVersionWarning` which is raised
   when a scikit-learn estimator is unpickled with a scikit-learn version that is
   inconsistent with the sckit-learn version the estimator was pickled with.
   :pr:`25297` by `Thomas Fan`_.
@@ -421,16 +602,24 @@ Changelog
 .........................
 
 - |Enhancement| Added support for `sample_weight` in
-  :func:`inspection.partial_dependence`. This allows for weighted averaging when
-  aggregating for each value of the grid we are making the inspection on. The
-  option is only available when `method` is set to `brute`. :pr:`25209`
-  by :user:`Carlo Lemos <vitaliset>`.
+  :func:`inspection.partial_dependence` and
+  :meth:`inspection.PartialDependenceDisplay.from_estimator`. This allows for
+  weighted averaging when aggregating for each value of the grid we are making the
+  inspection on. The option is only available when `method` is set to `brute`.
+  :pr:`25209` and :pr:`26644` by :user:`Carlo Lemos <vitaliset>`.
 
 - |API| :func:`inspection.partial_dependence` returns a :class:`utils.Bunch` with
   new key: `grid_values`. The `values` key is deprecated in favor of `grid_values`
   and the `values` key will be removed in 1.5.
   :pr:`21809` and :pr:`25732` by `Thomas Fan`_.
 
+:mod:`sklearn.kernel_approximation`
+...................................
+
+- |Fix| :class:`kernel_approximation.AdditiveChi2Sampler` is now stateless.
+  The `sample_interval_` attribute is deprecated and will be removed in 1.5.
+  :pr:`25190` by :user:`Vincent Maladière <Vincent-Maladiere>`.
+
 :mod:`sklearn.linear_model`
 ...........................
 
@@ -450,10 +639,16 @@ Changelog
   :pr:`25697` by :user:`John Pangas <jpangas>`.
 
 - |Fix| Use a more robust criterion to detect convergence of
-  :class:`linear_model.LogisticRegression(penalty="l1", solver="liblinear")`
+  :class:`linear_model.LogisticRegression` with `penalty="l1"` and `solver="liblinear"`
   on linearly separable problems.
   :pr:`25214` by `Tom Dupre la Tour`_.
 
+- |Fix| Fix a crash when calling `fit` on
+  :class:`linear_model.LogisticRegression` with `solver="newton-cholesky"` and
+  `max_iter=0` which failed to inspect the state of the model prior to the
+  first parameter update.
+  :pr:`26653` by :user:`Olivier Grisel <ogrisel>`.
+
 - |API| Deprecates `n_iter` in favor of `max_iter` in
   :class:`linear_model.BayesianRidge` and :class:`linear_model.ARDRegression`.
   `n_iter` will be removed in scikit-learn 1.5. This change makes those
@@ -515,7 +710,7 @@ Changelog
   chance level. This line is exposed in the `chance_level_` attribute.
   :pr:`26019` by :user:`Yao Xiao <Charlie-XIAO>`.
 
-- |Fix| :func:`metrics.manhattan_distances` now supports readonly sparse datasets.
+- |Fix| :func:`metrics.pairwise.manhattan_distances` now supports readonly sparse datasets.
   :pr:`25432` by :user:`Julien Jerphanion <jjerphan>`.
 
 - |Fix| Fixed :func:`metrics.classification_report` so that empty input will return
@@ -656,9 +851,10 @@ Changelog
   CSR matrix. :pr:`24145` by :user:`Christian Lorentzen <lorentzenchr>`.
 
 - |Enhancement| Adds a `feature_name_combiner` parameter to
-  :class:`preprocessing.OneHotEncoder`. This specifies a custom callable to create
-  feature names to be returned by :meth:`get_feature_names_out`.
-  The callable combines input arguments `(input_feature, category)` to a string.
+  :class:`preprocessing.OneHotEncoder`. This specifies a custom callable to
+  create feature names to be returned by
+  :meth:`preprocessing.OneHotEncoder.get_feature_names_out`. The callable
+  combines input arguments `(input_feature, category)` to a string.
   :pr:`22506` by :user:`Mario Kostelac <mariokostelac>`.
 
 - |Enhancement| Added support for `sample_weight` in
@@ -672,14 +868,6 @@ Changelog
   :class:`preprocessing.KBinsDiscretizer` regardless of the strategy used.
   :pr:`26424` by :user:`Jérémie du Boisberranger <jeremiedbb>`.
 
-- |Fix| :class:`preprocessing.AdditiveChi2Sampler` is now stateless.
-  The `sample_interval_` attribute is deprecated and will be removed in 1.5.
-  :pr:`25190` by :user:`Vincent Maladière <Vincent-Maladiere>`.
-
-- |Fix| :class:`AdditiveChi2Sampler` is now stateless.
-  The `sample_interval_` attribute is deprecated and will be removed in 1.5.
-  :pr:`25190` by :user:`Vincent Maladière <Vincent-Maladiere>`.
-
 - |Fix| :class:`preprocessing.PowerTransformer` now correctly preserves the Pandas
   Index when the `set_config(transform_output="pandas")`. :pr:`26454` by `Thomas Fan`_.
 
@@ -725,14 +913,14 @@ Changelog
 :mod:`sklearn.utils`
 ....................
 
-- |FIX| Fixes :func:`utils.validation.check_array` to properly convert pandas
+- |FIX| Fixes :func:`utils.check_array` to properly convert pandas
   extension arrays. :pr:`25813` and :pr:`26106` by `Thomas Fan`_.
 
-- |Fix| :func:`utils.validation.check_array` now supports pandas DataFrames with
+- |Fix| :func:`utils.check_array` now supports pandas DataFrames with
   extension arrays and object dtypes by return an ndarray with object dtype.
   :pr:`25814` by `Thomas Fan`_.
 
-- |API| :func:`utils.estimator_checks.check_transformers_unfitted_stateless` has been
+- |API| `utils.estimator_checks.check_transformers_unfitted_stateless` has been
   introduced to ensure stateless transformers don't raise `NotFittedError`
   during `transform` with no prior call to `fit` or `fit_transform`.
   :pr:`25190` by :user:`Vincent Maladière <Vincent-Maladiere>`.
@@ -763,4 +951,40 @@ Code and Documentation Contributors
 Thanks to everyone who has contributed to the maintenance and improvement of
 the project since version 1.2, including:
 
-TODO: update at the time of the release.
+2357juan, Abhishek Singh Kushwah, Adam Handke, Adam Kania, Adam Li, adienes,
+Admir Demiraj, adoublet, Adrin Jalali, A.H.Mansouri, Ahmedbgh, Ala-Na, Alex
+Buzenet, AlexL, Ali H. El-Kassas, amay, András Simon, André Pedersen, Andrew
+Wang, Ankur Singh, annegnx, Ansam Zedan, Anthony22-dev, Artur Hermano, Arturo
+Amor, as-90, ashah002, Ashish Dutt, Ashwin Mathur, AymericBasset, Azaria
+Gebremichael, Barata Tripramudya Onggo, Benedek Harsanyi, Benjamin Bossan,
+Bharat Raghunathan, Binesh Bannerjee, Boris Feld, Brendan Lu, Brevin Kunde,
+cache-missing, Camille Troillard, Carla J, carlo, Carlo Lemos, c-git, Changyao
+Chen, Chiara Marmo, Christian Lorentzen, Christian Veenhuis, Christine P. Chai,
+crispinlogan, Da-Lan, DanGonite57, Dave Berenbaum, davidblnc, david-cortes,
+Dayne, Dea María Léon, Denis, Dimitri Papadopoulos Orfanos, Dimitris
+Litsidis, Dmitry Nesterov, Dominic Fox, Dominik Prodinger, Edern, Ekaterina
+Butyugina, Elabonga Atuo, Emir, farhan khan, Felipe Siola, futurewarning, Gael
+Varoquaux, genvalen, Gleb Levitski, Guillaume Lemaitre, gunesbayir, Haesun
+Park, hujiahong726, i-aki-y, Ian Thompson, Ido M, Ily, Irene, Jack McIvor,
+jakirkham, James Dean, JanFidor, Jarrod Millman, JB Mountford, Jérémie du
+Boisberranger, Jessicakk0711, Jiawei Zhang, Joey Ortiz, JohnathanPi, John
+Pangas, Joshua Choo Yun Keat, Joshua Hedlund, JuliaSchoepp, Julien Jerphanion,
+jygerardy, ka00ri, Kaushik Amar Das, Kento Nozawa, Kian Eliasi, Kilian Kluge,
+Lene Preuss, Linus, Logan Thomas, Loic Esteve, Louis Fouquet, Lucy Liu, Madhura
+Jayaratne, Marc Torrellas Socastro, Maren Westermann, Mario Kostelac, Mark
+Harfouche, Marko Toplak, Marvin Krawutschke, Masanori Kanazu, mathurinm, Matt
+Haberland, Max Halford, maximeSaur, Maxwell Liu, m. bou, mdarii, Meekail Zain,
+Mikhail Iljin, murezzda, Nawazish Alam, Nicola Fanelli, Nightwalkx, Nikolay
+Petrov, Nishu Choudhary, NNLNR, npache, Olivier Grisel, Omar Salman, ouss1508,
+PAB, Pandata, partev, Peter Piontek, Phil, pnucci, Pooja M, Pooja Subramaniam,
+precondition, Quentin Barthélemy, Rafal Wojdyla, Raghuveer Bhat, Rahil Parikh,
+Ralf Gommers, ram vikram singh, Rushil Desai, Sadra Barikbin, SANJAI_3, Sashka
+Warner, Scott Gigante, Scott Gustafson, searchforpassion, Seoeun
+Hong, Shady el Gewily, Shiva chauhan, Shogo Hida, Shreesha Kumar Bhat, sonnivs,
+Sortofamudkip, Stanislav (Stanley) Modrak, Stefanie Senger, Steven Van
+Vaerenbergh, Tabea Kossen, Théophile Baranger, Thijs van Weezel, Thomas A
+Caswell, Thomas Germer, Thomas J. Fan, Tim Head, Tim P, Tom Dupré la Tour,
+tomiock, tspeng, Valentin Laurent, Veghit, VIGNESH D, Vijeth Moudgalya, Vinayak
+Mehta, Vincent M, Vincent-violet, Vyom Pathak, William M, windiana42, Xiao
+Yuan, Yao Xiao, Yaroslav Halchenko, Yotam Avidar-Constantini, Yuchen Zhou,
+Yusuf Raji, zeeshan lone
diff --git a/examples/applications/plot_cyclical_feature_engineering.py b/examples/applications/plot_cyclical_feature_engineering.py
index ecd270354ab76..12e285096726d 100644
--- a/examples/applications/plot_cyclical_feature_engineering.py
+++ b/examples/applications/plot_cyclical_feature_engineering.py
@@ -35,7 +35,6 @@
 # demand around the middle of the days:
 import matplotlib.pyplot as plt
 
-
 fig, ax = plt.subplots(figsize=(12, 4))
 average_week_demand = df.groupby(["weekday", "hour"])["count"].mean()
 average_week_demand.plot(ax=ax)
@@ -181,12 +180,11 @@
 #
 # The numerical variables need no preprocessing and, for the sake of simplicity,
 # we only try the default hyper-parameters for this model:
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import OrdinalEncoder
 from sklearn.compose import ColumnTransformer
 from sklearn.ensemble import HistGradientBoostingRegressor
 from sklearn.model_selection import cross_validate
-
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OrdinalEncoder
 
 categorical_columns = [
     "weather",
@@ -262,11 +260,10 @@ def evaluate(model, X, y, cv):
 # For consistency, we scale the numerical features to the same 0-1 range using
 # class:`sklearn.preprocessing.MinMaxScaler`, although in this case it does not
 # impact the results much because they are already on comparable scales:
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import MinMaxScaler
-from sklearn.linear_model import RidgeCV
 import numpy as np
 
+from sklearn.linear_model import RidgeCV
+from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
 
 one_hot_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)
 alphas = np.logspace(-6, 6, 25)
@@ -619,9 +616,8 @@ def periodic_spline_transformer(period, n_splines=None, degree=3):
 # However, it is possible to use the `PolynomialFeatures` class on coarse
 # grained spline encoded hours to model the "workingday"/"hours" interaction
 # explicitly without introducing too many new variables:
-from sklearn.preprocessing import PolynomialFeatures
 from sklearn.pipeline import FeatureUnion
-
+from sklearn.preprocessing import PolynomialFeatures
 
 hour_workday_interaction = make_pipeline(
     ColumnTransformer(
@@ -668,7 +664,6 @@ def periodic_spline_transformer(period, n_splines=None, degree=3):
 # polynomial kernel expansion. Let us try the latter:
 from sklearn.kernel_approximation import Nystroem
 
-
 cyclic_spline_poly_pipeline = make_pipeline(
     cyclic_spline_transformer,
     Nystroem(kernel="poly", degree=2, n_components=300, random_state=0),
diff --git a/examples/applications/plot_digits_denoising.py b/examples/applications/plot_digits_denoising.py
index 72637b6ab036f..bd8d5b1b7b037 100644
--- a/examples/applications/plot_digits_denoising.py
+++ b/examples/applications/plot_digits_denoising.py
@@ -32,9 +32,10 @@
 # :func:`~sklearn.datasets.fetch_openml` to get this dataset. In addition, we
 # normalize the dataset such that all pixel values are in the range (0, 1).
 import numpy as np
+
 from sklearn.datasets import fetch_openml
-from sklearn.preprocessing import MinMaxScaler
 from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import MinMaxScaler
 
 X, y = fetch_openml(data_id=41082, as_frame=False, return_X_y=True, parser="pandas")
 X = MinMaxScaler().fit_transform(X)
diff --git a/examples/applications/plot_face_recognition.py b/examples/applications/plot_face_recognition.py
index 878d889f52ce3..1ff4399d60739 100644
--- a/examples/applications/plot_face_recognition.py
+++ b/examples/applications/plot_face_recognition.py
@@ -13,18 +13,16 @@
 """
 # %%
 from time import time
+
 import matplotlib.pyplot as plt
+from scipy.stats import loguniform
 
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import RandomizedSearchCV
 from sklearn.datasets import fetch_lfw_people
-from sklearn.metrics import classification_report
-from sklearn.metrics import ConfusionMatrixDisplay
-from sklearn.preprocessing import StandardScaler
 from sklearn.decomposition import PCA
+from sklearn.metrics import ConfusionMatrixDisplay, classification_report
+from sklearn.model_selection import RandomizedSearchCV, train_test_split
+from sklearn.preprocessing import StandardScaler
 from sklearn.svm import SVC
-from scipy.stats import loguniform
-
 
 # %%
 # Download the data, if not already on disk and load it as numpy arrays
diff --git a/examples/applications/plot_model_complexity_influence.py b/examples/applications/plot_model_complexity_influence.py
index 812539aa1ff46..f83be241230c3 100644
--- a/examples/applications/plot_model_complexity_influence.py
+++ b/examples/applications/plot_model_complexity_influence.py
@@ -42,16 +42,16 @@
 # License: BSD 3 clause
 
 import time
-import numpy as np
+
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn import datasets
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import mean_squared_error
-from sklearn.svm import NuSVR
 from sklearn.ensemble import GradientBoostingRegressor
 from sklearn.linear_model import SGDClassifier
-from sklearn.metrics import hamming_loss
+from sklearn.metrics import hamming_loss, mean_squared_error
+from sklearn.model_selection import train_test_split
+from sklearn.svm import NuSVR
 
 # Initialize random generator
 np.random.seed(0)
diff --git a/examples/applications/plot_out_of_core_classification.py b/examples/applications/plot_out_of_core_classification.py
index 212cbda9cc71e..08ae3000c391c 100644
--- a/examples/applications/plot_out_of_core_classification.py
+++ b/examples/applications/plot_out_of_core_classification.py
@@ -19,24 +19,22 @@
 # License: BSD 3 clause
 
 import itertools
-from pathlib import Path
-from hashlib import sha256
 import re
+import sys
 import tarfile
 import time
-import sys
+from hashlib import sha256
+from html.parser import HTMLParser
+from pathlib import Path
+from urllib.request import urlretrieve
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib import rcParams
 
-from html.parser import HTMLParser
-from urllib.request import urlretrieve
 from sklearn.datasets import get_data_home
 from sklearn.feature_extraction.text import HashingVectorizer
-from sklearn.linear_model import SGDClassifier
-from sklearn.linear_model import PassiveAggressiveClassifier
-from sklearn.linear_model import Perceptron
+from sklearn.linear_model import PassiveAggressiveClassifier, Perceptron, SGDClassifier
 from sklearn.naive_bayes import MultinomialNB
 
 
diff --git a/examples/applications/plot_outlier_detection_wine.py b/examples/applications/plot_outlier_detection_wine.py
index 45e4c64d9fcc4..c4adfa222a5dd 100644
--- a/examples/applications/plot_outlier_detection_wine.py
+++ b/examples/applications/plot_outlier_detection_wine.py
@@ -37,12 +37,13 @@
 # Author: Virgile Fritsch <virgile.fritsch@inria.fr>
 # License: BSD 3 clause
 
+import matplotlib.font_manager
+import matplotlib.pyplot as plt
 import numpy as np
+
 from sklearn.covariance import EllipticEnvelope
-from sklearn.svm import OneClassSVM
-import matplotlib.pyplot as plt
-import matplotlib.font_manager
 from sklearn.datasets import load_wine
+from sklearn.svm import OneClassSVM
 
 # Define "classifiers" to be used
 classifiers = {
diff --git a/examples/applications/plot_prediction_latency.py b/examples/applications/plot_prediction_latency.py
index 9b99bcbfdfaf1..0c966b3b1e28e 100644
--- a/examples/applications/plot_prediction_latency.py
+++ b/examples/applications/plot_prediction_latency.py
@@ -16,19 +16,18 @@
 # Authors: Eustache Diemert <eustache@diemert.fr>
 # License: BSD 3 clause
 
+import gc
+import time
 from collections import defaultdict
 
-import time
-import gc
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import train_test_split
 from sklearn.datasets import make_regression
 from sklearn.ensemble import RandomForestRegressor
-from sklearn.linear_model import Ridge
-from sklearn.linear_model import SGDRegressor
+from sklearn.linear_model import Ridge, SGDRegressor
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
 from sklearn.svm import SVR
 from sklearn.utils import shuffle
 
@@ -233,7 +232,7 @@ def plot_n_features_influence(percentiles, percentile):
     fig, ax1 = plt.subplots(figsize=(10, 6))
     colors = ["r", "g", "b"]
     for i, cls_name in enumerate(percentiles.keys()):
-        x = np.array(sorted([n for n in percentiles[cls_name].keys()]))
+        x = np.array(sorted(percentiles[cls_name].keys()))
         y = np.array([percentiles[cls_name][n] for n in x])
         plt.plot(
             x,
diff --git a/examples/applications/plot_species_distribution_modeling.py b/examples/applications/plot_species_distribution_modeling.py
index e3d5778f3307d..bdf50918840c2 100644
--- a/examples/applications/plot_species_distribution_modeling.py
+++ b/examples/applications/plot_species_distribution_modeling.py
@@ -43,12 +43,12 @@
 
 from time import time
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn.utils import Bunch
+from sklearn import metrics, svm
 from sklearn.datasets import fetch_species_distributions
-from sklearn import svm, metrics
+from sklearn.utils import Bunch
 
 # if basemap is available, we'll use it.
 # otherwise, we'll improvise later...
diff --git a/examples/applications/plot_stock_market.py b/examples/applications/plot_stock_market.py
index 91168f434338e..cdf5a36074923 100644
--- a/examples/applications/plot_stock_market.py
+++ b/examples/applications/plot_stock_market.py
@@ -24,6 +24,7 @@
 # `alphavantage.co <https://www.alphavantage.co/>`_.
 
 import sys
+
 import numpy as np
 import pandas as pd
 
diff --git a/examples/applications/plot_tomography_l1_reconstruction.py b/examples/applications/plot_tomography_l1_reconstruction.py
index 9ac351c12206c..d851613402571 100644
--- a/examples/applications/plot_tomography_l1_reconstruction.py
+++ b/examples/applications/plot_tomography_l1_reconstruction.py
@@ -39,12 +39,11 @@ class :class:`~sklearn.linear_model.Lasso`, that uses the coordinate descent
 # Author: Emmanuelle Gouillart <emmanuelle.gouillart@nsup.org>
 # License: BSD 3 clause
 
-import numpy as np
-from scipy import sparse
-from scipy import ndimage
-from sklearn.linear_model import Lasso
-from sklearn.linear_model import Ridge
 import matplotlib.pyplot as plt
+import numpy as np
+from scipy import ndimage, sparse
+
+from sklearn.linear_model import Lasso, Ridge
 
 
 def _weights(x, dx=1, orig=0):
diff --git a/examples/applications/plot_topics_extraction_with_nmf_lda.py b/examples/applications/plot_topics_extraction_with_nmf_lda.py
index 38945241ab68b..86821b5458492 100644
--- a/examples/applications/plot_topics_extraction_with_nmf_lda.py
+++ b/examples/applications/plot_topics_extraction_with_nmf_lda.py
@@ -27,11 +27,12 @@
 # License: BSD 3 clause
 
 from time import time
+
 import matplotlib.pyplot as plt
 
-from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
-from sklearn.decomposition import NMF, MiniBatchNMF, LatentDirichletAllocation
 from sklearn.datasets import fetch_20newsgroups
+from sklearn.decomposition import NMF, LatentDirichletAllocation, MiniBatchNMF
+from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
 
 n_samples = 2000
 n_features = 1000
@@ -45,14 +46,13 @@ def plot_top_words(model, feature_names, n_top_words, title):
     fig, axes = plt.subplots(2, 5, figsize=(30, 15), sharex=True)
     axes = axes.flatten()
     for topic_idx, topic in enumerate(model.components_):
-        top_features_ind = topic.argsort()[: -n_top_words - 1 : -1]
-        top_features = [feature_names[i] for i in top_features_ind]
+        top_features_ind = topic.argsort()[-n_top_words:]
+        top_features = feature_names[top_features_ind]
         weights = topic[top_features_ind]
 
         ax = axes[topic_idx]
         ax.barh(top_features, weights, height=0.7)
         ax.set_title(f"Topic {topic_idx +1}", fontdict={"fontsize": 30})
-        ax.invert_yaxis()
         ax.tick_params(axis="both", which="major", labelsize=20)
         for i in "top right left".split():
             ax.spines[i].set_visible(False)
diff --git a/examples/applications/svm_gui.py b/examples/applications/svm_gui.py
index c8019fa72ae91..cd0e3b6101bb6 100644
--- a/examples/applications/svm_gui.py
+++ b/examples/applications/svm_gui.py
@@ -30,13 +30,13 @@
     from matplotlib.backends.backend_tkagg import (
         NavigationToolbar2TkAgg as NavigationToolbar2Tk,
     )
-from matplotlib.figure import Figure
-from matplotlib.contour import ContourSet
-
 import sys
-import numpy as np
 import tkinter as Tk
 
+import numpy as np
+from matplotlib.contour import ContourSet
+from matplotlib.figure import Figure
+
 from sklearn import svm
 from sklearn.datasets import dump_svmlight_file
 
diff --git a/examples/applications/wikipedia_principal_eigenvector.py b/examples/applications/wikipedia_principal_eigenvector.py
index fcc337b0a4e00..0be1661d7ed5c 100644
--- a/examples/applications/wikipedia_principal_eigenvector.py
+++ b/examples/applications/wikipedia_principal_eigenvector.py
@@ -33,19 +33,17 @@
 # Author: Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
 
-from bz2 import BZ2File
 import os
+from bz2 import BZ2File
 from datetime import datetime
 from pprint import pprint
 from time import time
+from urllib.request import urlopen
 
 import numpy as np
-
 from scipy import sparse
 
 from sklearn.decomposition import randomized_svd
-from urllib.request import urlopen
-
 
 # %%
 # Download data, if not already on disk
diff --git a/examples/bicluster/README.txt b/examples/bicluster/README.txt
index 468e2524eb310..0b2bda2522b63 100644
--- a/examples/bicluster/README.txt
+++ b/examples/bicluster/README.txt
@@ -3,4 +3,4 @@
 Biclustering
 ------------
 
-Examples concerning the :mod:`sklearn.cluster.bicluster` module.
+Examples concerning biclustering techniques.
diff --git a/examples/bicluster/plot_bicluster_newsgroups.py b/examples/bicluster/plot_bicluster_newsgroups.py
index a54f7099c9a74..0fef820bb9f2a 100644
--- a/examples/bicluster/plot_bicluster_newsgroups.py
+++ b/examples/bicluster/plot_bicluster_newsgroups.py
@@ -23,14 +23,13 @@
 
 """
 
-from collections import defaultdict
 import operator
+from collections import defaultdict
 from time import time
 
 import numpy as np
 
-from sklearn.cluster import SpectralCoclustering
-from sklearn.cluster import MiniBatchKMeans
+from sklearn.cluster import MiniBatchKMeans, SpectralCoclustering
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics.cluster import v_measure_score
diff --git a/examples/bicluster/plot_spectral_biclustering.py b/examples/bicluster/plot_spectral_biclustering.py
index baf0f0ccbc58f..041ef4c4944f6 100644
--- a/examples/bicluster/plot_spectral_biclustering.py
+++ b/examples/bicluster/plot_spectral_biclustering.py
@@ -32,9 +32,10 @@
 #
 # As you can see, the data is distributed over 12 cluster cells and is
 # relatively well distinguishable.
-from sklearn.datasets import make_checkerboard
 from matplotlib import pyplot as plt
 
+from sklearn.datasets import make_checkerboard
+
 n_clusters = (4, 3)
 data, rows, columns = make_checkerboard(
     shape=(300, 300), n_clusters=n_clusters, noise=10, shuffle=False, random_state=42
@@ -46,7 +47,7 @@
 
 # %%
 # We shuffle the data and the goal is to reconstruct it afterwards using
-# :class:`~sklearn.bicluster.SpectralBiclustering`.
+# :class:`~sklearn.cluster.SpectralBiclustering`.
 import numpy as np
 
 # Creating lists of shuffled row and column indices
@@ -56,7 +57,7 @@
 
 # %%
 # We redefine the shuffled data and plot it. We observe that we lost the
-# strucuture of original data matrix.
+# structure of original data matrix.
 data = data[row_idx_shuffled][:, col_idx_shuffled]
 
 plt.matshow(data, cmap=plt.cm.Blues)
diff --git a/examples/bicluster/plot_spectral_coclustering.py b/examples/bicluster/plot_spectral_coclustering.py
index 0df275e83e3bd..92b10d93956e7 100644
--- a/examples/bicluster/plot_spectral_coclustering.py
+++ b/examples/bicluster/plot_spectral_coclustering.py
@@ -21,8 +21,8 @@
 import numpy as np
 from matplotlib import pyplot as plt
 
-from sklearn.datasets import make_biclusters
 from sklearn.cluster import SpectralCoclustering
+from sklearn.datasets import make_biclusters
 from sklearn.metrics import consensus_score
 
 data, rows, columns = make_biclusters(
diff --git a/examples/calibration/plot_calibration.py b/examples/calibration/plot_calibration.py
index 75d1ea15b8fbd..f928ae631b78b 100644
--- a/examples/calibration/plot_calibration.py
+++ b/examples/calibration/plot_calibration.py
@@ -91,8 +91,8 @@
 # %%
 # Plot data and the predicted probabilities
 # -----------------------------------------
-from matplotlib import cm
 import matplotlib.pyplot as plt
+from matplotlib import cm
 
 plt.figure()
 y_unique = np.unique(y)
diff --git a/examples/calibration/plot_calibration_curve.py b/examples/calibration/plot_calibration_curve.py
index dc4e85a5f1678..915d3b7c20cc9 100644
--- a/examples/calibration/plot_calibration_curve.py
+++ b/examples/calibration/plot_calibration_curve.py
@@ -140,11 +140,11 @@
 import pandas as pd
 
 from sklearn.metrics import (
-    precision_score,
-    recall_score,
-    f1_score,
     brier_score_loss,
+    f1_score,
     log_loss,
+    precision_score,
+    recall_score,
     roc_auc_score,
 )
 
diff --git a/examples/calibration/plot_calibration_multiclass.py b/examples/calibration/plot_calibration_multiclass.py
index 24962a786ea03..fc6349f3dea5f 100644
--- a/examples/calibration/plot_calibration_multiclass.py
+++ b/examples/calibration/plot_calibration_multiclass.py
@@ -31,6 +31,7 @@ class of an instance (red: class 1, green: class 2, blue: class 3).
 # License: BSD Style.
 
 import numpy as np
+
 from sklearn.datasets import make_blobs
 
 np.random.seed(0)
diff --git a/examples/classification/plot_classification_probability.py b/examples/classification/plot_classification_probability.py
index 87c3f51db5eb2..ec5887b63914d 100644
--- a/examples/classification/plot_classification_probability.py
+++ b/examples/classification/plot_classification_probability.py
@@ -23,12 +23,12 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from sklearn.metrics import accuracy_score
-from sklearn.linear_model import LogisticRegression
-from sklearn.svm import SVC
+from sklearn import datasets
 from sklearn.gaussian_process import GaussianProcessClassifier
 from sklearn.gaussian_process.kernels import RBF
-from sklearn import datasets
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+from sklearn.svm import SVC
 
 iris = datasets.load_iris()
 X = iris.data[:, 0:2]  # we only take the first two features for visualization
diff --git a/examples/classification/plot_classifier_comparison.py b/examples/classification/plot_classifier_comparison.py
index 71e8318aa0acb..37dfa49af0bfd 100644
--- a/examples/classification/plot_classifier_comparison.py
+++ b/examples/classification/plot_classifier_comparison.py
@@ -3,7 +3,7 @@
 Classifier comparison
 =====================
 
-A comparison of a several classifiers in scikit-learn on synthetic datasets.
+A comparison of several classifiers in scikit-learn on synthetic datasets.
 The point of this example is to illustrate the nature of decision boundaries
 of different classifiers.
 This should be taken with a grain of salt, as the intuition conveyed by
@@ -24,23 +24,24 @@
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.colors import ListedColormap
+
+from sklearn.datasets import make_circles, make_classification, make_moons
+from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
+from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
+from sklearn.gaussian_process import GaussianProcessClassifier
+from sklearn.gaussian_process.kernels import RBF
+from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import make_pipeline
-from sklearn.datasets import make_moons, make_circles, make_classification
-from sklearn.neural_network import MLPClassifier
+from sklearn.naive_bayes import GaussianNB
 from sklearn.neighbors import KNeighborsClassifier
+from sklearn.neural_network import MLPClassifier
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 from sklearn.svm import SVC
-from sklearn.gaussian_process import GaussianProcessClassifier
-from sklearn.gaussian_process.kernels import RBF
 from sklearn.tree import DecisionTreeClassifier
-from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
-from sklearn.naive_bayes import GaussianNB
-from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
-from sklearn.inspection import DecisionBoundaryDisplay
 
 names = [
     "Nearest Neighbors",
@@ -57,13 +58,15 @@
 
 classifiers = [
     KNeighborsClassifier(3),
-    SVC(kernel="linear", C=0.025),
-    SVC(gamma=2, C=1),
-    GaussianProcessClassifier(1.0 * RBF(1.0)),
-    DecisionTreeClassifier(max_depth=5),
-    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
-    MLPClassifier(alpha=1, max_iter=1000),
-    AdaBoostClassifier(),
+    SVC(kernel="linear", C=0.025, random_state=42),
+    SVC(gamma=2, C=1, random_state=42),
+    GaussianProcessClassifier(1.0 * RBF(1.0), random_state=42),
+    DecisionTreeClassifier(max_depth=5, random_state=42),
+    RandomForestClassifier(
+        max_depth=5, n_estimators=10, max_features=1, random_state=42
+    ),
+    MLPClassifier(alpha=1, max_iter=1000, random_state=42),
+    AdaBoostClassifier(random_state=42),
     GaussianNB(),
     QuadraticDiscriminantAnalysis(),
 ]
diff --git a/examples/classification/plot_digits_classification.py b/examples/classification/plot_digits_classification.py
index f760916d1f66e..d6208400d5416 100644
--- a/examples/classification/plot_digits_classification.py
+++ b/examples/classification/plot_digits_classification.py
@@ -15,7 +15,7 @@
 import matplotlib.pyplot as plt
 
 # Import datasets, classifiers and performance metrics
-from sklearn import datasets, svm, metrics
+from sklearn import datasets, metrics, svm
 from sklearn.model_selection import train_test_split
 
 ###############################################################################
diff --git a/examples/classification/plot_lda.py b/examples/classification/plot_lda.py
index 322cc8bb4007c..b24479b91f5ea 100644
--- a/examples/classification/plot_lda.py
+++ b/examples/classification/plot_lda.py
@@ -8,13 +8,12 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
+from sklearn.covariance import OAS
 from sklearn.datasets import make_blobs
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
-from sklearn.covariance import OAS
-
 
 n_train = 20  # samples for training
 n_test = 200  # samples for testing
diff --git a/examples/classification/plot_lda_qda.py b/examples/classification/plot_lda_qda.py
index 712354f7f7f44..71230d0a9bcd9 100644
--- a/examples/classification/plot_lda_qda.py
+++ b/examples/classification/plot_lda_qda.py
@@ -15,8 +15,8 @@ class has its own standard deviation with QDA.
 # Colormap
 # --------
 
-import matplotlib.pyplot as plt
 import matplotlib as mpl
+import matplotlib.pyplot as plt
 from matplotlib import colors
 
 cmap = colors.LinearSegmentedColormap(
@@ -172,8 +172,10 @@ def plot_qda_cov(qda, splot):
     fontsize=15,
 )
 
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
-from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
+from sklearn.discriminant_analysis import (
+    LinearDiscriminantAnalysis,
+    QuadraticDiscriminantAnalysis,
+)
 
 for i, (X, y) in enumerate([dataset_fixed_cov(), dataset_cov()]):
     # Linear Discriminant Analysis
diff --git a/examples/cluster/plot_affinity_propagation.py b/examples/cluster/plot_affinity_propagation.py
index d2bc345c00b3e..5816ae298f419 100644
--- a/examples/cluster/plot_affinity_propagation.py
+++ b/examples/cluster/plot_affinity_propagation.py
@@ -10,8 +10,8 @@
 """
 import numpy as np
 
-from sklearn.cluster import AffinityPropagation
 from sklearn import metrics
+from sklearn.cluster import AffinityPropagation
 from sklearn.datasets import make_blobs
 
 # %%
diff --git a/examples/cluster/plot_agglomerative_clustering.py b/examples/cluster/plot_agglomerative_clustering.py
index 5bb87a9386bf8..0cbce55cd3f29 100644
--- a/examples/cluster/plot_agglomerative_clustering.py
+++ b/examples/cluster/plot_agglomerative_clustering.py
@@ -7,7 +7,7 @@
 neighbors.
 
 There are two advantages of imposing a connectivity. First, clustering
-without a connectivity matrix is much faster.
+with sparse connectivity matrices is faster in general.
 
 Second, when using a connectivity matrix, single, average and complete
 linkage are unstable and tend to create a few clusters that grow very
@@ -28,6 +28,7 @@
 # License: BSD 3 clause
 
 import time
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/cluster/plot_agglomerative_clustering_metrics.py b/examples/cluster/plot_agglomerative_clustering_metrics.py
index f1a77d442dbe8..8eb2ea3f7285f 100644
--- a/examples/cluster/plot_agglomerative_clustering_metrics.py
+++ b/examples/cluster/plot_agglomerative_clustering_metrics.py
@@ -37,8 +37,8 @@
 # Author: Gael Varoquaux
 # License: BSD 3-Clause or CC-0
 
-import matplotlib.pyplot as plt
 import matplotlib.patheffects as PathEffects
+import matplotlib.pyplot as plt
 import numpy as np
 
 from sklearn.cluster import AgglomerativeClustering
diff --git a/examples/cluster/plot_agglomerative_dendrogram.py b/examples/cluster/plot_agglomerative_dendrogram.py
index 2de5030d68f6d..20c22f4f0bb39 100644
--- a/examples/cluster/plot_agglomerative_dendrogram.py
+++ b/examples/cluster/plot_agglomerative_dendrogram.py
@@ -10,11 +10,11 @@
 """
 
 import numpy as np
-
 from matplotlib import pyplot as plt
 from scipy.cluster.hierarchy import dendrogram
-from sklearn.datasets import load_iris
+
 from sklearn.cluster import AgglomerativeClustering
+from sklearn.datasets import load_iris
 
 
 def plot_dendrogram(model, **kwargs):
diff --git a/examples/cluster/plot_birch_vs_minibatchkmeans.py b/examples/cluster/plot_birch_vs_minibatchkmeans.py
index 3d4185dc9368a..c9c213c948913 100644
--- a/examples/cluster/plot_birch_vs_minibatchkmeans.py
+++ b/examples/cluster/plot_birch_vs_minibatchkmeans.py
@@ -25,17 +25,17 @@
 #          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
 # License: BSD 3 clause
 
-from joblib import cpu_count
 from itertools import cycle
 from time import time
-import numpy as np
-import matplotlib.pyplot as plt
+
 import matplotlib.colors as colors
+import matplotlib.pyplot as plt
+import numpy as np
+from joblib import cpu_count
 
 from sklearn.cluster import Birch, MiniBatchKMeans
 from sklearn.datasets import make_blobs
 
-
 # Generate centers for the blobs so that it forms a 10 X 10 grid.
 xx = np.linspace(-22, 22, 10)
 yy = np.linspace(-22, 22, 10)
diff --git a/examples/cluster/plot_bisect_kmeans.py b/examples/cluster/plot_bisect_kmeans.py
index a6be3545e0b27..3aebdffddaf63 100644
--- a/examples/cluster/plot_bisect_kmeans.py
+++ b/examples/cluster/plot_bisect_kmeans.py
@@ -15,9 +15,8 @@
 """
 import matplotlib.pyplot as plt
 
-from sklearn.datasets import make_blobs
 from sklearn.cluster import BisectingKMeans, KMeans
-
+from sklearn.datasets import make_blobs
 
 print(__doc__)
 
diff --git a/examples/cluster/plot_cluster_comparison.py b/examples/cluster/plot_cluster_comparison.py
index 843c629374828..6e7ac7c4dfe56 100644
--- a/examples/cluster/plot_cluster_comparison.py
+++ b/examples/cluster/plot_cluster_comparison.py
@@ -26,26 +26,28 @@
 
 import time
 import warnings
+from itertools import cycle, islice
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn import cluster, datasets, mixture
 from sklearn.neighbors import kneighbors_graph
 from sklearn.preprocessing import StandardScaler
-from itertools import cycle, islice
-
-np.random.seed(0)
 
 # ============
 # Generate datasets. We choose the size big enough to see the scalability
 # of the algorithms, but not too big to avoid too long running times
 # ============
 n_samples = 500
-noisy_circles = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05)
-noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05)
-blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
-no_structure = np.random.rand(n_samples, 2), None
+seed = 30
+noisy_circles = datasets.make_circles(
+    n_samples=n_samples, factor=0.5, noise=0.05, random_state=seed
+)
+noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05, random_state=seed)
+blobs = datasets.make_blobs(n_samples=n_samples, random_state=seed)
+rng = np.random.RandomState(seed)
+no_structure = rng.rand(n_samples, 2), None
 
 # Anisotropicly distributed data
 random_state = 170
@@ -82,6 +84,7 @@
     "allow_single_cluster": True,
     "hdbscan_min_cluster_size": 15,
     "hdbscan_min_samples": 3,
+    "random_state": 42,
 }
 
 datasets = [
@@ -154,7 +157,11 @@
     # Create cluster objects
     # ============
     ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
-    two_means = cluster.MiniBatchKMeans(n_clusters=params["n_clusters"], n_init="auto")
+    two_means = cluster.MiniBatchKMeans(
+        n_clusters=params["n_clusters"],
+        n_init="auto",
+        random_state=params["random_state"],
+    )
     ward = cluster.AgglomerativeClustering(
         n_clusters=params["n_clusters"], linkage="ward", connectivity=connectivity
     )
@@ -162,6 +169,7 @@
         n_clusters=params["n_clusters"],
         eigen_solver="arpack",
         affinity="nearest_neighbors",
+        random_state=params["random_state"],
     )
     dbscan = cluster.DBSCAN(eps=params["eps"])
     hdbscan = cluster.HDBSCAN(
@@ -175,7 +183,9 @@
         min_cluster_size=params["min_cluster_size"],
     )
     affinity_propagation = cluster.AffinityPropagation(
-        damping=params["damping"], preference=params["preference"], random_state=0
+        damping=params["damping"],
+        preference=params["preference"],
+        random_state=params["random_state"],
     )
     average_linkage = cluster.AgglomerativeClustering(
         linkage="average",
@@ -185,7 +195,9 @@
     )
     birch = cluster.Birch(n_clusters=params["n_clusters"])
     gmm = mixture.GaussianMixture(
-        n_components=params["n_clusters"], covariance_type="full"
+        n_components=params["n_clusters"],
+        covariance_type="full",
+        random_state=params["random_state"],
     )
 
     clustering_algorithms = (
diff --git a/examples/cluster/plot_cluster_iris.py b/examples/cluster/plot_cluster_iris.py
index 4078d139f8064..b20bc8e38dd78 100644
--- a/examples/cluster/plot_cluster_iris.py
+++ b/examples/cluster/plot_cluster_iris.py
@@ -22,15 +22,15 @@
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
 
 # Though the following import is not directly being used, it is required
 # for 3D projection to work with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401
+import numpy as np
 
-from sklearn.cluster import KMeans
 from sklearn import datasets
+from sklearn.cluster import KMeans
 
 np.random.seed(5)
 
diff --git a/examples/cluster/plot_coin_segmentation.py b/examples/cluster/plot_coin_segmentation.py
index bec68d1221646..c965dc2bd7ace 100644
--- a/examples/cluster/plot_coin_segmentation.py
+++ b/examples/cluster/plot_coin_segmentation.py
@@ -27,15 +27,14 @@
 
 import time
 
+import matplotlib.pyplot as plt
 import numpy as np
 from scipy.ndimage import gaussian_filter
-import matplotlib.pyplot as plt
 from skimage.data import coins
 from skimage.transform import rescale
 
-from sklearn.feature_extraction import image
 from sklearn.cluster import spectral_clustering
-
+from sklearn.feature_extraction import image
 
 # load the coins as a numpy array
 orig_coins = coins()
diff --git a/examples/cluster/plot_color_quantization.py b/examples/cluster/plot_color_quantization.py
index ae37673808e56..cc8849b64ab6f 100644
--- a/examples/cluster/plot_color_quantization.py
+++ b/examples/cluster/plot_color_quantization.py
@@ -25,13 +25,15 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
+from time import time
+
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.cluster import KMeans
-from sklearn.metrics import pairwise_distances_argmin
 from sklearn.datasets import load_sample_image
+from sklearn.metrics import pairwise_distances_argmin
 from sklearn.utils import shuffle
-from time import time
 
 n_colors = 64
 
diff --git a/examples/cluster/plot_dbscan.py b/examples/cluster/plot_dbscan.py
index c762e0bceae08..0b0bd64ecf62b 100644
--- a/examples/cluster/plot_dbscan.py
+++ b/examples/cluster/plot_dbscan.py
@@ -44,8 +44,9 @@
 # the `labels_` attribute. Noisy samples are given the label math:`-1`.
 
 import numpy as np
-from sklearn.cluster import DBSCAN
+
 from sklearn import metrics
+from sklearn.cluster import DBSCAN
 
 db = DBSCAN(eps=0.3, min_samples=10).fit(X)
 labels = db.labels_
diff --git a/examples/cluster/plot_digits_agglomeration.py b/examples/cluster/plot_digits_agglomeration.py
index 77e11328415d2..faedefb8aeed8 100644
--- a/examples/cluster/plot_digits_agglomeration.py
+++ b/examples/cluster/plot_digits_agglomeration.py
@@ -3,7 +3,7 @@
 Feature agglomeration
 =========================================================
 
-These images how similar features are merged together using
+These images show how similar features are merged together using
 feature agglomeration.
 
 """
@@ -12,10 +12,10 @@
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn import datasets, cluster
+from sklearn import cluster, datasets
 from sklearn.feature_extraction.image import grid_to_graph
 
 digits = datasets.load_digits()
diff --git a/examples/cluster/plot_digits_linkage.py b/examples/cluster/plot_digits_linkage.py
index 730f85c543356..ae67bd5d8e0f4 100644
--- a/examples/cluster/plot_digits_linkage.py
+++ b/examples/cluster/plot_digits_linkage.py
@@ -35,7 +35,7 @@
 import numpy as np
 from matplotlib import pyplot as plt
 
-from sklearn import manifold, datasets
+from sklearn import datasets, manifold
 
 digits = datasets.load_digits()
 X, y = digits.data, digits.target
diff --git a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
index e2273326b9a12..577d65f314337 100644
--- a/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
+++ b/examples/cluster/plot_feature_agglomeration_vs_univariate_selection.py
@@ -21,18 +21,17 @@
 import shutil
 import tempfile
 
-import numpy as np
 import matplotlib.pyplot as plt
-from scipy import linalg, ndimage
+import numpy as np
 from joblib import Memory
+from scipy import linalg, ndimage
 
-from sklearn.feature_extraction.image import grid_to_graph
 from sklearn import feature_selection
 from sklearn.cluster import FeatureAgglomeration
+from sklearn.feature_extraction.image import grid_to_graph
 from sklearn.linear_model import BayesianRidge
+from sklearn.model_selection import GridSearchCV, KFold
 from sklearn.pipeline import Pipeline
-from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import KFold
 
 # %%
 # Set parameters
diff --git a/examples/cluster/plot_hdbscan.py b/examples/cluster/plot_hdbscan.py
index b97858ff156e8..630ab1f150fcb 100644
--- a/examples/cluster/plot_hdbscan.py
+++ b/examples/cluster/plot_hdbscan.py
@@ -13,11 +13,11 @@
 We first define a couple utility functions for convenience.
 """
 # %%
+import matplotlib.pyplot as plt
 import numpy as np
 
-from sklearn.cluster import HDBSCAN, DBSCAN
+from sklearn.cluster import DBSCAN, HDBSCAN
 from sklearn.datasets import make_blobs
-import matplotlib.pyplot as plt
 
 
 def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax=None):
@@ -84,7 +84,7 @@ def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax=
 # rescaled versions of the dataset.
 fig, axes = plt.subplots(3, 1, figsize=(10, 12))
 dbs = DBSCAN(eps=0.3)
-for idx, scale in enumerate((1, 0.5, 3)):
+for idx, scale in enumerate([1, 0.5, 3]):
     dbs.fit(X * scale)
     plot(X * scale, dbs.labels_, parameters={"scale": scale, "eps": 0.3}, ax=axes[idx])
 
@@ -105,15 +105,21 @@ def plot(X, labels, probabilities=None, parameters=None, ground_truth=False, ax=
 # One immediate advantage is that HDBSCAN is scale-invariant.
 fig, axes = plt.subplots(3, 1, figsize=(10, 12))
 hdb = HDBSCAN()
-for idx, scale in enumerate((1, 0.5, 3)):
-    hdb.fit(X)
-    plot(X, hdb.labels_, hdb.probabilities_, ax=axes[idx], parameters={"scale": scale})
+for idx, scale in enumerate([1, 0.5, 3]):
+    hdb.fit(X * scale)
+    plot(
+        X * scale,
+        hdb.labels_,
+        hdb.probabilities_,
+        ax=axes[idx],
+        parameters={"scale": scale},
+    )
 # %%
 # Multi-Scale Clustering
 # ----------------------
 # HDBSCAN is much more than scale invariant though -- it is capable of
 # multi-scale clustering, which accounts for clusters with varying density.
-# Traditional DBSCAN assumes that any potential clusters are homogenous in
+# Traditional DBSCAN assumes that any potential clusters are homogeneous in
 # density. HDBSCAN is free from such constraints. To demonstrate this we
 # consider the following dataset
 centers = [[-0.85, -0.85], [-0.85, 0.85], [3, 3], [3, -3]]
diff --git a/examples/cluster/plot_inductive_clustering.py b/examples/cluster/plot_inductive_clustering.py
index e395571a1caad..b6464459160e3 100644
--- a/examples/cluster/plot_inductive_clustering.py
+++ b/examples/cluster/plot_inductive_clustering.py
@@ -24,6 +24,7 @@
 #          Christos Aridas
 
 import matplotlib.pyplot as plt
+
 from sklearn.base import BaseEstimator, clone
 from sklearn.cluster import AgglomerativeClustering
 from sklearn.datasets import make_blobs
@@ -32,7 +33,6 @@
 from sklearn.utils.metaestimators import available_if
 from sklearn.utils.validation import check_is_fitted
 
-
 N_SAMPLES = 5000
 RANDOM_STATE = 42
 
diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py
index bc1f01cb1cdd7..46a7ec6fa58b5 100644
--- a/examples/cluster/plot_kmeans_assumptions.py
+++ b/examples/cluster/plot_kmeans_assumptions.py
@@ -21,6 +21,7 @@
 # one has to define a linear `transformation`.
 
 import numpy as np
+
 from sklearn.datasets import make_blobs
 
 n_samples = 1500
diff --git a/examples/cluster/plot_kmeans_digits.py b/examples/cluster/plot_kmeans_digits.py
index 94bba2a5c52d9..d61ec91d13d52 100644
--- a/examples/cluster/plot_kmeans_digits.py
+++ b/examples/cluster/plot_kmeans_digits.py
@@ -34,6 +34,7 @@
 # to group images such that the handwritten digits on the image are the same.
 
 import numpy as np
+
 from sklearn.datasets import load_digits
 
 data, labels = load_digits(return_X_y=True)
@@ -53,6 +54,7 @@
 # * train and time the pipeline fitting;
 # * measure the performance of the clustering obtained via different metrics.
 from time import time
+
 from sklearn import metrics
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
diff --git a/examples/cluster/plot_kmeans_plusplus.py b/examples/cluster/plot_kmeans_plusplus.py
index 1f3507c0062ac..69ea738635ddf 100644
--- a/examples/cluster/plot_kmeans_plusplus.py
+++ b/examples/cluster/plot_kmeans_plusplus.py
@@ -10,9 +10,10 @@
 
 """
 
+import matplotlib.pyplot as plt
+
 from sklearn.cluster import kmeans_plusplus
 from sklearn.datasets import make_blobs
-import matplotlib.pyplot as plt
 
 # Generate sample data
 n_samples = 4000
diff --git a/examples/cluster/plot_kmeans_silhouette_analysis.py b/examples/cluster/plot_kmeans_silhouette_analysis.py
index c7d0dc31d4873..c5817a750c2bb 100644
--- a/examples/cluster/plot_kmeans_silhouette_analysis.py
+++ b/examples/cluster/plot_kmeans_silhouette_analysis.py
@@ -31,14 +31,14 @@
 
 """
 
-from sklearn.datasets import make_blobs
-from sklearn.cluster import KMeans
-from sklearn.metrics import silhouette_samples, silhouette_score
-
-import matplotlib.pyplot as plt
 import matplotlib.cm as cm
+import matplotlib.pyplot as plt
 import numpy as np
 
+from sklearn.cluster import KMeans
+from sklearn.datasets import make_blobs
+from sklearn.metrics import silhouette_samples, silhouette_score
+
 # Generating the sample data from make_blobs
 # This particular setting has one distinct cluster and 3 clusters placed close
 # together.
diff --git a/examples/cluster/plot_kmeans_stability_low_dim_dense.py b/examples/cluster/plot_kmeans_stability_low_dim_dense.py
index c88cf864506f7..9340239a3d00e 100644
--- a/examples/cluster/plot_kmeans_stability_low_dim_dense.py
+++ b/examples/cluster/plot_kmeans_stability_low_dim_dense.py
@@ -26,14 +26,12 @@
 # Author: Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
 
-import numpy as np
-import matplotlib.pyplot as plt
 import matplotlib.cm as cm
+import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn.utils import shuffle
-from sklearn.utils import check_random_state
-from sklearn.cluster import MiniBatchKMeans
-from sklearn.cluster import KMeans
+from sklearn.cluster import KMeans, MiniBatchKMeans
+from sklearn.utils import check_random_state, shuffle
 
 random_state = np.random.RandomState(0)
 
diff --git a/examples/cluster/plot_linkage_comparison.py b/examples/cluster/plot_linkage_comparison.py
index af4c3cd2894af..793fee059d797 100644
--- a/examples/cluster/plot_linkage_comparison.py
+++ b/examples/cluster/plot_linkage_comparison.py
@@ -25,36 +25,36 @@
 
 import time
 import warnings
+from itertools import cycle, islice
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn import cluster, datasets
 from sklearn.preprocessing import StandardScaler
-from itertools import cycle, islice
-
-np.random.seed(0)
 
 # %%
 # Generate datasets. We choose the size big enough to see the scalability
 # of the algorithms, but not too big to avoid too long running times
 
 n_samples = 1500
-noisy_circles = datasets.make_circles(n_samples=n_samples, factor=0.5, noise=0.05)
-noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05)
-blobs = datasets.make_blobs(n_samples=n_samples, random_state=8)
-no_structure = np.random.rand(n_samples, 2), None
+noisy_circles = datasets.make_circles(
+    n_samples=n_samples, factor=0.5, noise=0.05, random_state=170
+)
+noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05, random_state=170)
+blobs = datasets.make_blobs(n_samples=n_samples, random_state=170)
+rng = np.random.RandomState(170)
+no_structure = rng.rand(n_samples, 2), None
 
 # Anisotropicly distributed data
-random_state = 170
-X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
+X, y = datasets.make_blobs(n_samples=n_samples, random_state=170)
 transformation = [[0.6, -0.6], [-0.4, 0.8]]
 X_aniso = np.dot(X, transformation)
 aniso = (X_aniso, y)
 
 # blobs with varied variances
 varied = datasets.make_blobs(
-    n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=random_state
+    n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=170
 )
 
 # %%
diff --git a/examples/cluster/plot_mean_shift.py b/examples/cluster/plot_mean_shift.py
index 46ded7bc43421..aacbc7f216405 100644
--- a/examples/cluster/plot_mean_shift.py
+++ b/examples/cluster/plot_mean_shift.py
@@ -12,6 +12,7 @@
 """
 
 import numpy as np
+
 from sklearn.cluster import MeanShift, estimate_bandwidth
 from sklearn.datasets import make_blobs
 
diff --git a/examples/cluster/plot_mini_batch_kmeans.py b/examples/cluster/plot_mini_batch_kmeans.py
index 7a9d599704059..3a6e8aa63786b 100644
--- a/examples/cluster/plot_mini_batch_kmeans.py
+++ b/examples/cluster/plot_mini_batch_kmeans.py
@@ -21,6 +21,7 @@
 # We start by generating the blobs of data to be clustered.
 
 import numpy as np
+
 from sklearn.datasets import make_blobs
 
 np.random.seed(0)
@@ -35,6 +36,7 @@
 # ------------------------------
 
 import time
+
 from sklearn.cluster import KMeans
 
 k_means = KMeans(init="k-means++", n_clusters=3, n_init=10)
diff --git a/examples/cluster/plot_optics.py b/examples/cluster/plot_optics.py
index 7915abd20ce53..523742d43f43d 100644
--- a/examples/cluster/plot_optics.py
+++ b/examples/cluster/plot_optics.py
@@ -8,6 +8,7 @@
 Finds core samples of high density and expands clusters from them.
 This example uses data that is generated so that the clusters have
 different densities.
+
 The :class:`~cluster.OPTICS` is first used with its Xi cluster detection
 method, and then setting specific thresholds on the reachability, which
 corresponds to :class:`~cluster.DBSCAN`. We can see that the different
@@ -20,11 +21,12 @@
 #          Adrin Jalali <adrin.jalali@gmail.com>
 # License: BSD 3 clause
 
-from sklearn.cluster import OPTICS, cluster_optics_dbscan
 import matplotlib.gridspec as gridspec
 import matplotlib.pyplot as plt
 import numpy as np
 
+from sklearn.cluster import OPTICS, cluster_optics_dbscan
+
 # Generate sample data
 
 np.random.seed(0)
diff --git a/examples/cluster/plot_segmentation_toy.py b/examples/cluster/plot_segmentation_toy.py
index 0880cdb893839..6fc41f7a5daf2 100644
--- a/examples/cluster/plot_segmentation_toy.py
+++ b/examples/cluster/plot_segmentation_toy.py
@@ -78,9 +78,10 @@
 # %%
 # Here we perform spectral clustering using the arpack solver since amg is
 # numerically unstable on this example. We then plot the results.
-from sklearn.cluster import spectral_clustering
 import matplotlib.pyplot as plt
 
+from sklearn.cluster import spectral_clustering
+
 labels = spectral_clustering(graph, n_clusters=4, eigen_solver="arpack")
 label_im = np.full(mask.shape, -1.0)
 label_im[mask] = labels
diff --git a/examples/cluster/plot_ward_structured_vs_unstructured.py b/examples/cluster/plot_ward_structured_vs_unstructured.py
index 430d00a8b3730..446d744b31e78 100644
--- a/examples/cluster/plot_ward_structured_vs_unstructured.py
+++ b/examples/cluster/plot_ward_structured_vs_unstructured.py
@@ -29,18 +29,14 @@
 
 # The following import is required
 # for 3D projection to work with matplotlib < 3.2
-
 import mpl_toolkits.mplot3d  # noqa: F401
-
 import numpy as np
 
-
 # %%
 # Generate data
 # -------------
 #
 # We start by generating the Swiss Roll dataset.
-
 from sklearn.datasets import make_swiss_roll
 
 n_samples = 1500
diff --git a/examples/compose/plot_column_transformer.py b/examples/compose/plot_column_transformer.py
index d4798d828b321..669e817cbf81d 100644
--- a/examples/compose/plot_column_transformer.py
+++ b/examples/compose/plot_column_transformer.py
@@ -24,14 +24,14 @@
 
 import numpy as np
 
-from sklearn.preprocessing import FunctionTransformer
+from sklearn.compose import ColumnTransformer
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.decomposition import TruncatedSVD
 from sklearn.feature_extraction import DictVectorizer
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.metrics import classification_report
 from sklearn.pipeline import Pipeline
-from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import FunctionTransformer
 from sklearn.svm import LinearSVC
 
 ##############################################################################
diff --git a/examples/compose/plot_column_transformer_mixed_types.py b/examples/compose/plot_column_transformer_mixed_types.py
index 5ed3239db8478..d7efd033440ce 100644
--- a/examples/compose/plot_column_transformer_mixed_types.py
+++ b/examples/compose/plot_column_transformer_mixed_types.py
@@ -34,12 +34,12 @@
 
 from sklearn.compose import ColumnTransformer
 from sklearn.datasets import fetch_openml
-from sklearn.pipeline import Pipeline
+from sklearn.feature_selection import SelectPercentile, chi2
 from sklearn.impute import SimpleImputer
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
 from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import train_test_split, RandomizedSearchCV
-from sklearn.feature_selection import SelectPercentile, chi2
+from sklearn.model_selection import RandomizedSearchCV, train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 np.random.seed(0)
 
diff --git a/examples/compose/plot_compare_reduction.py b/examples/compose/plot_compare_reduction.py
index 47975f84325b8..529366c6244f2 100644
--- a/examples/compose/plot_compare_reduction.py
+++ b/examples/compose/plot_compare_reduction.py
@@ -28,15 +28,16 @@
 # Illustration of ``Pipeline`` and ``GridSearchCV``
 ###############################################################################
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.datasets import load_digits
+from sklearn.decomposition import NMF, PCA
+from sklearn.feature_selection import SelectKBest, mutual_info_classif
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
-from sklearn.svm import LinearSVC
-from sklearn.decomposition import PCA, NMF
-from sklearn.feature_selection import SelectKBest, mutual_info_classif
 from sklearn.preprocessing import MinMaxScaler
+from sklearn.svm import LinearSVC
 
 X, y = load_digits(return_X_y=True)
 
@@ -103,9 +104,10 @@
 #     cache. Hence, use the ``memory`` constructor parameter when the fitting
 #     of a transformer is costly.
 
-from joblib import Memory
 from shutil import rmtree
 
+from joblib import Memory
+
 # Create a temporary folder to store the transformers of the pipeline
 location = "cachedir"
 memory = Memory(location=location, verbose=10)
diff --git a/examples/compose/plot_digits_pipe.py b/examples/compose/plot_digits_pipe.py
index 640cd6e529a8d..2769422c404a4 100644
--- a/examples/compose/plot_digits_pipe.py
+++ b/examples/compose/plot_digits_pipe.py
@@ -14,15 +14,15 @@
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 
 from sklearn import datasets
 from sklearn.decomposition import PCA
 from sklearn.linear_model import LogisticRegression
-from sklearn.pipeline import Pipeline
 from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 
 # Define a pipeline to search for the best combination of PCA truncation
diff --git a/examples/compose/plot_feature_union.py b/examples/compose/plot_feature_union.py
index e014b8b8808b9..01f7e02bfe44f 100644
--- a/examples/compose/plot_feature_union.py
+++ b/examples/compose/plot_feature_union.py
@@ -20,12 +20,12 @@
 #
 # License: BSD 3 clause
 
-from sklearn.pipeline import Pipeline, FeatureUnion
-from sklearn.model_selection import GridSearchCV
-from sklearn.svm import SVC
 from sklearn.datasets import load_iris
 from sklearn.decomposition import PCA
 from sklearn.feature_selection import SelectKBest
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import FeatureUnion, Pipeline
+from sklearn.svm import SVC
 
 iris = load_iris()
 
diff --git a/examples/compose/plot_transformed_target.py b/examples/compose/plot_transformed_target.py
index 1e550ca0ea837..b01c9fbe37934 100644
--- a/examples/compose/plot_transformed_target.py
+++ b/examples/compose/plot_transformed_target.py
@@ -32,6 +32,7 @@
 # (`np.expm1`) will be used to transform the targets before training a linear
 # regression model and using it for prediction.
 import numpy as np
+
 from sklearn.datasets import make_regression
 
 X, y = make_regression(n_samples=10_000, noise=100, random_state=0)
@@ -42,6 +43,7 @@
 # Below we plot the probability density functions of the target
 # before and after applying the logarithmic functions.
 import matplotlib.pyplot as plt
+
 from sklearn.model_selection import train_test_split
 
 f, (ax0, ax1) = plt.subplots(1, 2)
diff --git a/examples/covariance/plot_covariance_estimation.py b/examples/covariance/plot_covariance_estimation.py
index be3bf4837eb9f..df9af8ea330ba 100644
--- a/examples/covariance/plot_covariance_estimation.py
+++ b/examples/covariance/plot_covariance_estimation.py
@@ -37,9 +37,10 @@
 # Compute the likelihood on test data
 # -----------------------------------
 
-from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood
 from scipy import linalg
 
+from sklearn.covariance import ShrunkCovariance, empirical_covariance, log_likelihood
+
 # spanning a range of possible shrinkage coefficient values
 shrinkages = np.logspace(-2, 0, 30)
 negative_logliks = [
@@ -73,8 +74,8 @@
 #   are Gaussian, in particular for small samples.
 
 
+from sklearn.covariance import OAS, LedoitWolf
 from sklearn.model_selection import GridSearchCV
-from sklearn.covariance import LedoitWolf, OAS
 
 # GridSearch for an optimal shrinkage coefficient
 tuned_parameters = [{"shrinkage": shrinkages}]
diff --git a/examples/covariance/plot_lw_vs_oas.py b/examples/covariance/plot_lw_vs_oas.py
index 1fd84b180f50a..107f6bd1c29cc 100644
--- a/examples/covariance/plot_lw_vs_oas.py
+++ b/examples/covariance/plot_lw_vs_oas.py
@@ -21,11 +21,11 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
-from scipy.linalg import toeplitz, cholesky
+import numpy as np
+from scipy.linalg import cholesky, toeplitz
 
-from sklearn.covariance import LedoitWolf, OAS
+from sklearn.covariance import OAS, LedoitWolf
 
 np.random.seed(0)
 # %%
diff --git a/examples/covariance/plot_mahalanobis_distances.py b/examples/covariance/plot_mahalanobis_distances.py
index b93d68a269706..bd61e5af22147 100644
--- a/examples/covariance/plot_mahalanobis_distances.py
+++ b/examples/covariance/plot_mahalanobis_distances.py
@@ -103,6 +103,7 @@
 # designed to have a much larger variance in feature 2.
 
 import matplotlib.pyplot as plt
+
 from sklearn.covariance import EmpiricalCovariance, MinCovDet
 
 # fit a MCD robust estimator to data
diff --git a/examples/covariance/plot_robust_vs_empirical_covariance.py b/examples/covariance/plot_robust_vs_empirical_covariance.py
index 9111ec82bcbf3..c61a97ddd979b 100644
--- a/examples/covariance/plot_robust_vs_empirical_covariance.py
+++ b/examples/covariance/plot_robust_vs_empirical_covariance.py
@@ -53,9 +53,9 @@
 
 """
 
-import numpy as np
-import matplotlib.pyplot as plt
 import matplotlib.font_manager
+import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.covariance import EmpiricalCovariance, MinCovDet
 
diff --git a/examples/covariance/plot_sparse_cov.py b/examples/covariance/plot_sparse_cov.py
index 96a5486dc964e..a088aeb7e69c0 100644
--- a/examples/covariance/plot_sparse_cov.py
+++ b/examples/covariance/plot_sparse_cov.py
@@ -59,6 +59,7 @@
 # -----------------
 import numpy as np
 from scipy import linalg
+
 from sklearn.datasets import make_sparse_spd_matrix
 
 n_samples = 60
diff --git a/examples/cross_decomposition/plot_pcr_vs_pls.py b/examples/cross_decomposition/plot_pcr_vs_pls.py
index 529225d11eead..895c75dc1a728 100644
--- a/examples/cross_decomposition/plot_pcr_vs_pls.py
+++ b/examples/cross_decomposition/plot_pcr_vs_pls.py
@@ -41,8 +41,9 @@
 # into PCR and PLS, we fit a PCA estimator to display the two principal
 # components of this dataset, i.e. the two directions that explain the most
 # variance in the data.
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.decomposition import PCA
 
 rng = np.random.RandomState(0)
@@ -99,12 +100,12 @@
 # For both models, we plot the projected data onto the first component against
 # the target. In both cases, this projected data is what the regressors will
 # use as training data.
+from sklearn.cross_decomposition import PLSRegression
+from sklearn.decomposition import PCA
+from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
-from sklearn.linear_model import LinearRegression
 from sklearn.preprocessing import StandardScaler
-from sklearn.decomposition import PCA
-from sklearn.cross_decomposition import PLSRegression
 
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
 
diff --git a/examples/datasets/plot_digits_last_image.py b/examples/datasets/plot_digits_last_image.py
index 95ce867011a9a..0fde32cc674a8 100644
--- a/examples/datasets/plot_digits_last_image.py
+++ b/examples/datasets/plot_digits_last_image.py
@@ -18,10 +18,10 @@
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
-from sklearn import datasets
-
 import matplotlib.pyplot as plt
 
+from sklearn import datasets
+
 # Load the digits dataset
 digits = datasets.load_digits()
 
diff --git a/examples/datasets/plot_iris_dataset.py b/examples/datasets/plot_iris_dataset.py
index 16edcdf37b70d..32aba8918547e 100644
--- a/examples/datasets/plot_iris_dataset.py
+++ b/examples/datasets/plot_iris_dataset.py
@@ -1,7 +1,7 @@
 """
-=========================================================
+================
 The Iris Dataset
-=========================================================
+================
 This data sets consists of 3 different types of irises'
 (Setosa, Versicolour, and Virginica) petal and sepal
 length, stored in a 150x4 numpy.ndarray
@@ -19,37 +19,47 @@
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
-import matplotlib.pyplot as plt
-
-# unused but required import for doing 3d projections with matplotlib < 3.2
-import mpl_toolkits.mplot3d  # noqa: F401
-
+# %%
+# Loading the iris dataset
+# ------------------------
 from sklearn import datasets
-from sklearn.decomposition import PCA
 
-# import some data to play with
 iris = datasets.load_iris()
-X = iris.data[:, :2]  # we only take the first two features.
-y = iris.target
 
-x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
-y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
 
-plt.figure(2, figsize=(8, 6))
-plt.clf()
+# %%
+# Scatter Plot of the Iris dataset
+# --------------------------------
+import matplotlib.pyplot as plt
+
+_, ax = plt.subplots()
+scatter = ax.scatter(iris.data[:, 0], iris.data[:, 1], c=iris.target)
+ax.set(xlabel=iris.feature_names[0], ylabel=iris.feature_names[1])
+_ = ax.legend(
+    scatter.legend_elements()[0], iris.target_names, loc="lower right", title="Classes"
+)
 
-# Plot the training points
-plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Set1, edgecolor="k")
-plt.xlabel("Sepal length")
-plt.ylabel("Sepal width")
+# %%
+# Each point in the scatter plot refers to one of the 150 iris flowers
+# in the dataset, with the color indicating their respective type
+# (Setosa, Versicolour, and Virginica).
+# You can already see a pattern regarding the Setosa type, which is
+# easily identifiable based on its short and wide sepal. Only
+# considering these 2 dimensions, sepal width and length, there's still
+# overlap between the Versicolor and Virginica types.
+
+# %%
+# Plot a PCA representation
+# -------------------------
+# Let's apply a Principal Component Analysis (PCA) to the iris dataset
+# and then plot the irises across the first three PCA dimensions.
+# This will allow us to better differentiate between the three types!
 
-plt.xlim(x_min, x_max)
-plt.ylim(y_min, y_max)
-plt.xticks(())
-plt.yticks(())
+# unused but required import for doing 3d projections with matplotlib < 3.2
+import mpl_toolkits.mplot3d  # noqa: F401
+
+from sklearn.decomposition import PCA
 
-# To getter a better understanding of interaction of the dimensions
-# plot the first three PCA dimensions
 fig = plt.figure(1, figsize=(8, 6))
 ax = fig.add_subplot(111, projection="3d", elev=-150, azim=110)
 
@@ -58,18 +68,22 @@
     X_reduced[:, 0],
     X_reduced[:, 1],
     X_reduced[:, 2],
-    c=y,
-    cmap=plt.cm.Set1,
-    edgecolor="k",
+    c=iris.target,
     s=40,
 )
 
-ax.set_title("First three PCA directions")
-ax.set_xlabel("1st eigenvector")
+ax.set_title("First three PCA dimensions")
+ax.set_xlabel("1st Eigenvector")
 ax.xaxis.set_ticklabels([])
-ax.set_ylabel("2nd eigenvector")
+ax.set_ylabel("2nd Eigenvector")
 ax.yaxis.set_ticklabels([])
-ax.set_zlabel("3rd eigenvector")
+ax.set_zlabel("3rd Eigenvector")
 ax.zaxis.set_ticklabels([])
 
 plt.show()
+
+# %%
+# PCA will create 3 new features that are a linear combination of the
+# 4 original features. In addition, this transform maximizes the variance.
+# With this transformation, we see that we can identify each species using
+# only the first feature (i.e. first eigenvalues).
diff --git a/examples/datasets/plot_random_dataset.py b/examples/datasets/plot_random_dataset.py
index 4f3fdbbb11ef5..e5cbdb080b59f 100644
--- a/examples/datasets/plot_random_dataset.py
+++ b/examples/datasets/plot_random_dataset.py
@@ -16,9 +16,7 @@
 
 import matplotlib.pyplot as plt
 
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_blobs
-from sklearn.datasets import make_gaussian_quantiles
+from sklearn.datasets import make_blobs, make_classification, make_gaussian_quantiles
 
 plt.figure(figsize=(8, 8))
 plt.subplots_adjust(bottom=0.05, top=0.9, left=0.05, right=0.95)
diff --git a/examples/datasets/plot_random_multilabel_dataset.py b/examples/datasets/plot_random_multilabel_dataset.py
index f22c7b9695c42..e6e2d6ad9edcf 100644
--- a/examples/datasets/plot_random_multilabel_dataset.py
+++ b/examples/datasets/plot_random_multilabel_dataset.py
@@ -35,8 +35,8 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.datasets import make_multilabel_classification as make_ml_clf
 
diff --git a/examples/decomposition/plot_faces_decomposition.py b/examples/decomposition/plot_faces_decomposition.py
index 12c091c8e14cb..59c030dbab2ae 100644
--- a/examples/decomposition/plot_faces_decomposition.py
+++ b/examples/decomposition/plot_faces_decomposition.py
@@ -5,7 +5,7 @@
 
 This example applies to :ref:`olivetti_faces_dataset` different unsupervised
 matrix decomposition (dimension reduction) methods from the module
-:py:mod:`sklearn.decomposition` (see the documentation chapter
+:mod:`sklearn.decomposition` (see the documentation chapter
 :ref:`decompositions`).
 
 
@@ -21,12 +21,11 @@
 
 import logging
 
-from numpy.random import RandomState
 import matplotlib.pyplot as plt
+from numpy.random import RandomState
 
+from sklearn import cluster, decomposition
 from sklearn.datasets import fetch_olivetti_faces
-from sklearn import cluster
-from sklearn import decomposition
 
 rng = RandomState(0)
 
@@ -147,9 +146,10 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 # Sparse components - MiniBatchSparsePCA
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# Mini-batch sparse PCA (`MiniBatchSparsePCA`) extracts the set of sparse
-# components that best reconstruct the data. This variant is faster but
-# less accurate than the similar :py:mod:`sklearn.decomposition.SparsePCA`.
+# Mini-batch sparse PCA (:class:`~sklearn.decomposition.MiniBatchSparsePCA`)
+# extracts the set of sparse components that best reconstruct the data. This
+# variant is faster but less accurate than the similar
+# :class:`~sklearn.decomposition.SparsePCA`.
 
 # %%
 batch_pca_estimator = decomposition.MiniBatchSparsePCA(
@@ -165,9 +165,9 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 # Dictionary learning
 # ^^^^^^^^^^^^^^^^^^^
 #
-# By default, :class:`MiniBatchDictionaryLearning` divides the data into
-# mini-batches and optimizes in an online manner by cycling over the
-# mini-batches for the specified number of iterations.
+# By default, :class:`~sklearn.decomposition.MiniBatchDictionaryLearning`
+# divides the data into mini-batches and optimizes in an online manner by
+# cycling over the mini-batches for the specified number of iterations.
 
 # %%
 batch_dict_estimator = decomposition.MiniBatchDictionaryLearning(
@@ -180,9 +180,11 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 # Cluster centers - MiniBatchKMeans
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# `MiniBatchKMeans` is computationally efficient and implements on-line
-# learning with a `partial_fit` method. That is why it could be beneficial
-# to enhance some time-consuming algorithms with  `MiniBatchKMeans`.
+# :class:`sklearn.cluster.MiniBatchKMeans` is computationally efficient and
+# implements on-line learning with a
+# :meth:`~sklearn.decomposition.MiniBatchKMeans.partial_fit` method. That is
+# why it could be beneficial to enhance some time-consuming algorithms with
+# :class:`~sklearn.cluster.MiniBatchKMeans`.
 
 # %%
 kmeans_estimator = cluster.MiniBatchKMeans(
@@ -204,10 +206,10 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 # Factor Analysis components - FA
 # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 #
-# `Factor Analysis` is similar to `PCA` but has the advantage of modelling the
-# variance in every direction of the input space independently
-# (heteroscedastic noise).
-# Read more in the :ref:`User Guide <FA>`.
+# :class:`~sklearn.decomposition.FactorAnalysis` is similar to
+# :class:`~sklearn.decomposition.PCA` but has the advantage of modelling the
+# variance in every direction of the input space independently (heteroscedastic
+# noise). Read more in the :ref:`User Guide <FA>`.
 
 # %%
 fa_estimator = decomposition.FactorAnalysis(n_components=n_components, max_iter=20)
@@ -240,9 +242,10 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 # a dictionary. It is possible to constrain the dictionary and/or coding coefficients
 # to be positive to match constraints that may be present in the data.
 #
-# :class:`MiniBatchDictionaryLearning` implements a faster, but less accurate
-# version of the dictionary learning algorithm that is better suited for large
-# datasets. Read more in the :ref:`User Guide <MiniBatchDictionaryLearning>`.
+# :class:`~sklearn.decomposition.MiniBatchDictionaryLearning` implements a
+# faster, but less accurate version of the dictionary learning algorithm that
+# is better suited for large datasets. Read more in the :ref:`User Guide
+# <MiniBatchDictionaryLearning>`.
 
 # %%
 # Plot the same samples from our dataset but with another colormap.
@@ -253,11 +256,11 @@ def plot_gallery(title, images, n_col=n_col, n_row=n_row, cmap=plt.cm.gray):
 
 # %%
 # Similar to the previous examples, we change parameters and train
-# `MiniBatchDictionaryLearning` estimator on all images. Generally,
-# the dictionary learning and sparse encoding decompose input data
-# into the dictionary and the coding coefficients matrices.
-# :math:`X \approx UV`, where :math:`X = [x_1, . . . , x_n]`,
-# :math:`X \in \mathbb{R}^{m×n}`, dictionary :math:`U \in \mathbb{R}^{m×k}`, coding
+# :class:`~sklearn.decomposition.MiniBatchDictionaryLearning` estimator on all
+# images. Generally, the dictionary learning and sparse encoding decompose
+# input data into the dictionary and the coding coefficients matrices. :math:`X
+# \approx UV`, where :math:`X = [x_1, . . . , x_n]`, :math:`X \in
+# \mathbb{R}^{m×n}`, dictionary :math:`U \in \mathbb{R}^{m×k}`, coding
 # coefficients :math:`V \in \mathbb{R}^{k×n}`.
 #
 # Also below are the results when the dictionary and coding
diff --git a/examples/decomposition/plot_ica_blind_source_separation.py b/examples/decomposition/plot_ica_blind_source_separation.py
index 8c1529a3256fb..584d6b9509589 100644
--- a/examples/decomposition/plot_ica_blind_source_separation.py
+++ b/examples/decomposition/plot_ica_blind_source_separation.py
@@ -41,7 +41,7 @@
 # Fit ICA and PCA models
 # ----------------------
 
-from sklearn.decomposition import FastICA, PCA
+from sklearn.decomposition import PCA, FastICA
 
 # Compute ICA
 ica = FastICA(n_components=3, whiten="arbitrary-variance")
diff --git a/examples/decomposition/plot_image_denoising.py b/examples/decomposition/plot_image_denoising.py
index 2840905f0f604..646669d1469ff 100644
--- a/examples/decomposition/plot_image_denoising.py
+++ b/examples/decomposition/plot_image_denoising.py
@@ -37,7 +37,6 @@
 # ------------------------
 import numpy as np
 
-
 try:  # Scipy >= 1.10
     from scipy.datasets import face
 except ImportError:
diff --git a/examples/decomposition/plot_incremental_pca.py b/examples/decomposition/plot_incremental_pca.py
index adc7f83f3cda0..8e5aeccfddc8a 100644
--- a/examples/decomposition/plot_incremental_pca.py
+++ b/examples/decomposition/plot_incremental_pca.py
@@ -22,8 +22,8 @@
 # Authors: Kyle Kastner
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.datasets import load_iris
 from sklearn.decomposition import PCA, IncrementalPCA
diff --git a/examples/decomposition/plot_kernel_pca.py b/examples/decomposition/plot_kernel_pca.py
index 8b04f6809d2da..10f82ffec15f0 100644
--- a/examples/decomposition/plot_kernel_pca.py
+++ b/examples/decomposition/plot_kernel_pca.py
@@ -4,7 +4,7 @@
 ==========
 
 This example shows the difference between the Principal Components Analysis
-(:class:`~sklearn.decomposition.PCA`) and its kernalized version
+(:class:`~sklearn.decomposition.PCA`) and its kernelized version
 (:class:`~sklearn.decomposition.KernelPCA`).
 
 On the one hand, we show that :class:`~sklearn.decomposition.KernelPCA` is able
diff --git a/examples/decomposition/plot_pca_3d.py b/examples/decomposition/plot_pca_3d.py
index 692b9983ed55e..61ce5dde75c89 100644
--- a/examples/decomposition/plot_pca_3d.py
+++ b/examples/decomposition/plot_pca_3d.py
@@ -19,7 +19,6 @@
 # ---------------
 
 import numpy as np
-
 from scipy import stats
 
 e = np.exp(1)
@@ -52,13 +51,13 @@ def pdf(x):
 # Plot the figures
 # ----------------
 
-from sklearn.decomposition import PCA
-
 import matplotlib.pyplot as plt
 
 # unused but required import for doing 3d projections with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401
 
+from sklearn.decomposition import PCA
+
 
 def plot_figs(fig_num, elev, azim):
     fig = plt.figure(fig_num, figsize=(4, 3))
diff --git a/examples/decomposition/plot_pca_iris.py b/examples/decomposition/plot_pca_iris.py
index 7c3e69580d298..d025ba34adc27 100644
--- a/examples/decomposition/plot_pca_iris.py
+++ b/examples/decomposition/plot_pca_iris.py
@@ -13,15 +13,13 @@
 # Code source: Gaël Varoquaux
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
 
-
-from sklearn import decomposition
-from sklearn import datasets
-
 # unused but required import for doing 3d projections with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401
+import numpy as np
+
+from sklearn import datasets, decomposition
 
 np.random.seed(5)
 
diff --git a/examples/decomposition/plot_pca_vs_fa_model_selection.py b/examples/decomposition/plot_pca_vs_fa_model_selection.py
index 4c934ab756c3e..e269fc6b5c278 100644
--- a/examples/decomposition/plot_pca_vs_fa_model_selection.py
+++ b/examples/decomposition/plot_pca_vs_fa_model_selection.py
@@ -34,7 +34,6 @@
 # ---------------
 
 import numpy as np
-
 from scipy import linalg
 
 n_samples, n_features, rank = 500, 25, 5
@@ -56,10 +55,9 @@
 
 import matplotlib.pyplot as plt
 
+from sklearn.covariance import LedoitWolf, ShrunkCovariance
 from sklearn.decomposition import PCA, FactorAnalysis
-from sklearn.covariance import ShrunkCovariance, LedoitWolf
-from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import GridSearchCV, cross_val_score
 
 n_components = np.arange(0, n_features, 5)  # options for n_components
 
diff --git a/examples/decomposition/plot_sparse_coding.py b/examples/decomposition/plot_sparse_coding.py
index 4f4602f1ff1ac..c45cd3c83b04f 100644
--- a/examples/decomposition/plot_sparse_coding.py
+++ b/examples/decomposition/plot_sparse_coding.py
@@ -16,8 +16,8 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.decomposition import SparseCoder
 
diff --git a/examples/decomposition/plot_varimax_fa.py b/examples/decomposition/plot_varimax_fa.py
index 6e50709620325..9d4c3b9ed1ee7 100644
--- a/examples/decomposition/plot_varimax_fa.py
+++ b/examples/decomposition/plot_varimax_fa.py
@@ -22,9 +22,9 @@
 import matplotlib.pyplot as plt
 import numpy as np
 
-from sklearn.decomposition import FactorAnalysis, PCA
-from sklearn.preprocessing import StandardScaler
 from sklearn.datasets import load_iris
+from sklearn.decomposition import PCA, FactorAnalysis
+from sklearn.preprocessing import StandardScaler
 
 # %%
 # Load Iris data
diff --git a/examples/developing_estimators/README.txt b/examples/developing_estimators/README.txt
new file mode 100644
index 0000000000000..dc2c2ffde352a
--- /dev/null
+++ b/examples/developing_estimators/README.txt
@@ -0,0 +1,6 @@
+.. _developing_estimator_examples:
+
+Developing Estimators
+---------------------
+
+Examples concerning the development of Custom Estimator.
\ No newline at end of file
diff --git a/examples/developing_estimators/sklearn_is_fitted.py b/examples/developing_estimators/sklearn_is_fitted.py
new file mode 100644
index 0000000000000..f426ed2e0b441
--- /dev/null
+++ b/examples/developing_estimators/sklearn_is_fitted.py
@@ -0,0 +1,76 @@
+"""
+========================================
+`__sklearn_is_fitted__` as Developer API
+========================================
+
+The `__sklearn_is_fitted__` method is a convention used in scikit-learn for
+checking whether an estimator object has been fitted or not. This method is
+typically implemented in custom estimator classes that are built on top of
+scikit-learn's base classes like `BaseEstimator` or its subclasses.
+
+Developers should use :func:`~sklearn.sklearn.utils.validation.check_is_fitted`
+at the beginning of all methods except `fit`. If they need to customize or
+speed-up the check, they can implement the `__sklearn_is_fitted__` method as
+shown below.
+
+In this example the custom estimator showcases the usage of the
+`__sklearn_is_fitted__` method and the `check_is_fitted` utility function
+as developer APIs. The `__sklearn_is_fitted__` method checks fitted status
+by verifying the presence of the `_is_fitted` attribute.
+"""
+
+# %%
+# An example custom estimator implementing a simple classifier
+# ------------------------------------------------------------
+# This code snippet defines a custom estimator class called `CustomEstimator`
+# that extends both the `BaseEstimator` and `ClassifierMixin` classes from
+# scikit-learn and showcases the usage of the `__sklearn_is_fitted__` method
+# and the `check_is_fitted` utility function.
+
+# Author: Kushan <kushansharma1@gmail.com>
+#
+# License: BSD 3 clause
+
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils.validation import check_is_fitted
+
+
+class CustomEstimator(BaseEstimator, ClassifierMixin):
+    def __init__(self, parameter=1):
+        self.parameter = parameter
+
+    def fit(self, X, y):
+        """
+        Fit the estimator to the training data.
+        """
+        self.classes_ = sorted(set(y))
+        # Custom attribute to track if the estimator is fitted
+        self._is_fitted = True
+        return self
+
+    def predict(self, X):
+        """
+        Perform Predictions
+
+        If the estimator is not fitted, then raise NotFittedError
+        """
+        check_is_fitted(self)
+        # Perform prediction logic
+        predictions = [self.classes_[0]] * len(X)
+        return predictions
+
+    def score(self, X, y):
+        """
+        Calculate Score
+
+        If the estimator is not fitted, then raise NotFittedError
+        """
+        check_is_fitted(self)
+        # Perform scoring logic
+        return 0.5
+
+    def __sklearn_is_fitted__(self):
+        """
+        Check fitted status and return a Boolean value.
+        """
+        return hasattr(self, "_is_fitted") and self._is_fitted
diff --git a/examples/ensemble/plot_adaboost_hastie_10_2.py b/examples/ensemble/plot_adaboost_hastie_10_2.py
index 13d3a90d3b05c..313056286f6ba 100644
--- a/examples/ensemble/plot_adaboost_hastie_10_2.py
+++ b/examples/ensemble/plot_adaboost_hastie_10_2.py
@@ -94,6 +94,7 @@
 # added to the ensemble.
 
 import numpy as np
+
 from sklearn.metrics import zero_one_loss
 
 ada_discrete_err = np.zeros((n_estimators,))
diff --git a/examples/ensemble/plot_adaboost_multiclass.py b/examples/ensemble/plot_adaboost_multiclass.py
index fae87b4a42d3d..6990ba88cf9ec 100644
--- a/examples/ensemble/plot_adaboost_multiclass.py
+++ b/examples/ensemble/plot_adaboost_multiclass.py
@@ -1,123 +1,253 @@
-r"""
+"""
 =====================================
 Multi-class AdaBoosted Decision Trees
 =====================================
 
-This example reproduces Figure 1 of Zhu et al [1]_ and shows how boosting can
-improve prediction accuracy on a multi-class problem. The classification
-dataset is constructed by taking a ten-dimensional standard normal distribution
-and defining three classes separated by nested concentric ten-dimensional
-spheres such that roughly equal numbers of samples are in each class (quantiles
-of the :math:`\chi^2` distribution).
-
-The performance of the SAMME and SAMME.R [1]_ algorithms are compared. SAMME.R
-uses the probability estimates to update the additive model, while SAMME  uses
-the classifications only. As the example illustrates, the SAMME.R algorithm
-typically converges faster than SAMME, achieving a lower test error with fewer
-boosting iterations. The error of each algorithm on the test set after each
-boosting iteration is shown on the left, the classification error on the test
-set of each tree is shown in the middle, and the boost weight of each tree is
-shown on the right. All trees have a weight of one in the SAMME.R algorithm and
-therefore are not shown.
-
-.. [1] J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class AdaBoost", 2009.
+This example shows how boosting can improve the prediction accuracy on a
+multi-label classification problem. It reproduces a similar experiment as
+depicted by Figure 1 in Zhu et al [1]_.
+
+The core principle of AdaBoost (Adaptive Boosting) is to fit a sequence of weak
+learners (e.g. Decision Trees) on repeatedly re-sampled versions of the data.
+Each sample carries a weight that is adjusted after each training step, such
+that misclassified samples will be assigned higher weights. The re-sampling
+process with replacement takes into account the weights assigned to each sample.
+Samples with higher weights have a greater chance of being selected multiple
+times in the new data set, while samples with lower weights are less likely to
+be selected. This ensures that subsequent iterations of the algorithm focus on
+the difficult-to-classify samples.
+
+.. topic:: References:
+
+    .. [1] :doi:`J. Zhu, H. Zou, S. Rosset, T. Hastie, "Multi-class adaboost."
+           Statistics and its Interface 2.3 (2009): 349-360.
+           <10.4310/SII.2009.v2.n3.a8>`
 
 """
 
-# Author: Noel Dawe <noel.dawe@gmail.com>
-#
+# Noel Dawe <noel.dawe@gmail.com>
 # License: BSD 3 clause
 
-import matplotlib.pyplot as plt
-
+# %%
+# Creating the dataset
+# --------------------
+# The classification dataset is constructed by taking a ten-dimensional standard
+# normal distribution (:math:`x` in :math:`R^{10}`) and defining three classes
+# separated by nested concentric ten-dimensional spheres such that roughly equal
+# numbers of samples are in each class (quantiles of the :math:`\chi^2`
+# distribution).
 from sklearn.datasets import make_gaussian_quantiles
-from sklearn.ensemble import AdaBoostClassifier
-from sklearn.metrics import accuracy_score
-from sklearn.tree import DecisionTreeClassifier
-
 
 X, y = make_gaussian_quantiles(
-    n_samples=13000, n_features=10, n_classes=3, random_state=1
+    n_samples=2_000, n_features=10, n_classes=3, random_state=1
 )
 
-n_split = 3000
+# %%
+# We split the dataset into 2 sets: 70 percent of the samples are used for
+# training and the remaining 30 percent for testing.
+from sklearn.model_selection import train_test_split
 
-X_train, X_test = X[:n_split], X[n_split:]
-y_train, y_test = y[:n_split], y[n_split:]
-
-bdt_real = AdaBoostClassifier(
-    DecisionTreeClassifier(max_depth=2), n_estimators=300, learning_rate=1
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, train_size=0.7, random_state=42
 )
 
-bdt_discrete = AdaBoostClassifier(
-    DecisionTreeClassifier(max_depth=2),
-    n_estimators=300,
-    learning_rate=1.5,
+# %%
+# Training the `AdaBoostClassifier`
+# ---------------------------------
+# We train the :class:`~sklearn.ensemble.AdaBoostClassifier`. The estimator
+# utilizes boosting to improve the classification accuracy. Boosting is a method
+# designed to train weak learners (i.e. `base_estimator`) that learn from their
+# predecessor's mistakes.
+#
+# Here, we define the weak learner as a
+# :class:`~sklearn.tree.DecisionTreeClassifier` and set the maximum number of
+# leaves to 8. In a real setting, this parameter should be tuned. We set it to a
+# rather low value to limit the runtime of the example.
+#
+# The `SAMME` algorithm build into the
+# :class:`~sklearn.ensemble.AdaBoostClassifier` then uses the correct or
+# incorrect predictions made be the current weak learner to update the sample
+# weights used for training the consecutive weak learners. Also, the weight of
+# the weak learner itself is calculated based on its accuracy in classifying the
+# training examples. The weight of the weak learner determines its influence on
+# the final ensemble prediction.
+from sklearn.ensemble import AdaBoostClassifier
+from sklearn.tree import DecisionTreeClassifier
+
+weak_learner = DecisionTreeClassifier(max_leaf_nodes=8)
+n_estimators = 300
+
+adaboost_clf = AdaBoostClassifier(
+    estimator=weak_learner,
+    n_estimators=n_estimators,
     algorithm="SAMME",
-)
+    random_state=42,
+).fit(X_train, y_train)
+
+# %%
+# Analysis
+# --------
+# Convergence of the `AdaBoostClassifier`
+# ***************************************
+# To demonstrate the effectiveness of boosting in improving accuracy, we
+# evaluate the misclassification error of the boosted trees in comparison to two
+# baseline scores. The first baseline score is the `misclassification_error`
+# obtained from a single weak-learner (i.e.
+# :class:`~sklearn.tree.DecisionTreeClassifier`), which serves as a reference
+# point. The second baseline score is obtained from the
+# :class:`~sklearn.dummy.DummyClassifier`, which predicts the most prevalent
+# class in a dataset.
+from sklearn.dummy import DummyClassifier
+from sklearn.metrics import accuracy_score
 
-bdt_real.fit(X_train, y_train)
-bdt_discrete.fit(X_train, y_train)
+dummy_clf = DummyClassifier()
 
-real_test_errors = []
-discrete_test_errors = []
 
-for real_test_predict, discrete_test_predict in zip(
-    bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)
-):
-    real_test_errors.append(1.0 - accuracy_score(real_test_predict, y_test))
-    discrete_test_errors.append(1.0 - accuracy_score(discrete_test_predict, y_test))
+def misclassification_error(y_true, y_pred):
+    return 1 - accuracy_score(y_true, y_pred)
 
-n_trees_discrete = len(bdt_discrete)
-n_trees_real = len(bdt_real)
 
-# Boosting might terminate early, but the following arrays are always
-# n_estimators long. We crop them to the actual number of trees here:
-discrete_estimator_errors = bdt_discrete.estimator_errors_[:n_trees_discrete]
-real_estimator_errors = bdt_real.estimator_errors_[:n_trees_real]
-discrete_estimator_weights = bdt_discrete.estimator_weights_[:n_trees_discrete]
+weak_learners_misclassification_error = misclassification_error(
+    y_test, weak_learner.fit(X_train, y_train).predict(X_test)
+)
 
-plt.figure(figsize=(15, 5))
+dummy_classifiers_misclassification_error = misclassification_error(
+    y_test, dummy_clf.fit(X_train, y_train).predict(X_test)
+)
 
-plt.subplot(131)
-plt.plot(range(1, n_trees_discrete + 1), discrete_test_errors, c="black", label="SAMME")
-plt.plot(
-    range(1, n_trees_real + 1),
-    real_test_errors,
-    c="black",
-    linestyle="dashed",
-    label="SAMME.R",
+print(
+    "DecisionTreeClassifier's misclassification_error: "
+    f"{weak_learners_misclassification_error:.3f}"
+)
+print(
+    "DummyClassifier's misclassification_error: "
+    f"{dummy_classifiers_misclassification_error:.3f}"
 )
-plt.legend()
-plt.ylim(0.18, 0.62)
-plt.ylabel("Test Error")
-plt.xlabel("Number of Trees")
 
-plt.subplot(132)
+# %%
+# After training the :class:`~sklearn.tree.DecisionTreeClassifier` model, the
+# achieved error surpasses the expected value that would have been obtained by
+# guessing the most frequent class label, as the
+# :class:`~sklearn.dummy.DummyClassifier` does.
+#
+# Now, we calculate the `misclassification_error`, i.e. `1 - accuracy`, of the
+# additive model (:class:`~sklearn.tree.DecisionTreeClassifier`) at each
+# boosting iteration on the test set to assess its performance.
+#
+# We use :meth:`~sklearn.ensemble.AdaBoostClassifier.staged_predict` that makes
+# as many iterations as the number of fitted estimator (i.e. corresponding to
+# `n_estimators`). At iteration `n`, the predictions of AdaBoost only use the
+# `n` first weak learners. We compare these predictions with the true
+# predictions `y_test` and we, therefore, conclude on the benefit (or not) of adding a
+# new weak learner into the chain.
+#
+# We plot the misclassification error for the different stages:
+import matplotlib.pyplot as plt
+import pandas as pd
+
+boosting_errors = pd.DataFrame(
+    {
+        "Number of trees": range(1, n_estimators + 1),
+        "AdaBoost": [
+            misclassification_error(y_test, y_pred)
+            for y_pred in adaboost_clf.staged_predict(X_test)
+        ],
+    }
+).set_index("Number of trees")
+ax = boosting_errors.plot()
+ax.set_ylabel("Misclassification error on test set")
+ax.set_title("Convergence of AdaBoost algorithm")
+
 plt.plot(
-    range(1, n_trees_discrete + 1),
-    discrete_estimator_errors,
-    "b",
-    label="SAMME",
-    alpha=0.5,
+    [boosting_errors.index.min(), boosting_errors.index.max()],
+    [weak_learners_misclassification_error, weak_learners_misclassification_error],
+    color="tab:orange",
+    linestyle="dashed",
 )
 plt.plot(
-    range(1, n_trees_real + 1), real_estimator_errors, "r", label="SAMME.R", alpha=0.5
+    [boosting_errors.index.min(), boosting_errors.index.max()],
+    [
+        dummy_classifiers_misclassification_error,
+        dummy_classifiers_misclassification_error,
+    ],
+    color="c",
+    linestyle="dotted",
 )
-plt.legend()
-plt.ylabel("Error")
-plt.xlabel("Number of Trees")
-plt.ylim((0.2, max(real_estimator_errors.max(), discrete_estimator_errors.max()) * 1.2))
-plt.xlim((-20, len(bdt_discrete) + 20))
-
-plt.subplot(133)
-plt.plot(range(1, n_trees_discrete + 1), discrete_estimator_weights, "b", label="SAMME")
-plt.legend()
-plt.ylabel("Weight")
-plt.xlabel("Number of Trees")
-plt.ylim((0, discrete_estimator_weights.max() * 1.2))
-plt.xlim((-20, n_trees_discrete + 20))
-
-# prevent overlapping y-axis labels
-plt.subplots_adjust(wspace=0.25)
+plt.legend(["AdaBoost", "DecisionTreeClassifier", "DummyClassifier"], loc=1)
 plt.show()
+
+# %%
+# The plot shows the missclassification error on the test set after each
+# boosting iteration. We see that the error of the boosted trees converges to an
+# error of around 0.3 after 50 iterations, indicating a significantly higher
+# accuracy compared to a single tree, as illustrated by the dashed line in the
+# plot.
+#
+# The misclassification error jitters because the `SAMME` algorithm uses the
+# discrete outputs of the weak learners to train the boosted model.
+#
+# The convergence of :class:`~sklearn.ensemble.AdaBoostClassifier` is mainly
+# influenced by the learning rate (i.e `learning_rate`), the number of weak
+# learners used (`n_estimators`), and the expressivity of the weak learners
+# (e.g. `max_leaf_nodes`).
+
+# %%
+# Errors and weights of the Weak Learners
+# ***************************************
+# As previously mentioned, AdaBoost is a forward stagewise additive model. We
+# now focus on understanding the relationship between the attributed weights of
+# the weak learners and their statistical performance.
+#
+# We use the fitted :class:`~sklearn.ensemble.AdaBoostClassifier`'s attributes
+# `estimator_errors_` and `estimator_weights_` to investigate this link.
+weak_learners_info = pd.DataFrame(
+    {
+        "Number of trees": range(1, n_estimators + 1),
+        "Errors": adaboost_clf.estimator_errors_,
+        "Weights": adaboost_clf.estimator_weights_,
+    }
+).set_index("Number of trees")
+
+axs = weak_learners_info.plot(
+    subplots=True, layout=(1, 2), figsize=(10, 4), legend=False, color="tab:blue"
+)
+axs[0, 0].set_ylabel("Train error")
+axs[0, 0].set_title("Weak learner's training error")
+axs[0, 1].set_ylabel("Weight")
+axs[0, 1].set_title("Weak learner's weight")
+fig = axs[0, 0].get_figure()
+fig.suptitle("Weak learner's errors and weights for the AdaBoostClassifier")
+fig.tight_layout()
+
+# %%
+# On the left plot, we show the weighted error of each weak learner on the
+# reweighted training set at each boosting iteration. On the right plot, we show
+# the weights associated with each weak learner later used to make the
+# predictions of the final additive model.
+#
+# We see that the error of the weak learner is the inverse of the weights. It
+# means that our additive model will trust more a weak learner that makes
+# smaller errors (on the training set) by increasing its impact on the final
+# decision. Indeed, this exactly is the formulation of updating the base
+# estimators' weights after each iteration in AdaBoost.
+#
+# |details-start| Mathematical details |details-split|
+#
+# The weight associated with a weak learner trained at the stage :math:`m` is
+# inversely associated with its misclassification error such that:
+#
+# .. math:: \alpha^{(m)} = \log \frac{1 - err^{(m)}}{err^{(m)}} + \log (K - 1),
+#
+# where :math:`\alpha^{(m)}` and :math:`err^{(m)}` are the weight and the error
+# of the :math:`m` th weak learner, respectively, and :math:`K` is the number of
+# classes in our classification problem. |details-end|
+#
+# Another interesting observation boils down to the fact that the first weak
+# learners of the model make fewer errors than later weak learners of the
+# boosting chain.
+#
+# The intuition behind this observation is the following: due to the sample
+# reweighting, later classifiers are forced to try to classify more difficult or
+# noisy samples and to ignore already well classified samples. Therefore, the
+# overall error on the training set will increase. That's why the weak learner's
+# weights are built to counter-balance the worse performing weak learners.
diff --git a/examples/ensemble/plot_adaboost_twoclass.py b/examples/ensemble/plot_adaboost_twoclass.py
index 19679c6285d3b..d1e89c47b7fcf 100644
--- a/examples/ensemble/plot_adaboost_twoclass.py
+++ b/examples/ensemble/plot_adaboost_twoclass.py
@@ -21,14 +21,13 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn.ensemble import AdaBoostClassifier
-from sklearn.tree import DecisionTreeClassifier
 from sklearn.datasets import make_gaussian_quantiles
+from sklearn.ensemble import AdaBoostClassifier
 from sklearn.inspection import DecisionBoundaryDisplay
-
+from sklearn.tree import DecisionTreeClassifier
 
 # Construct dataset
 X1, y1 = make_gaussian_quantiles(
diff --git a/examples/ensemble/plot_bias_variance.py b/examples/ensemble/plot_bias_variance.py
index 4f57b90019e94..9239603115db1 100644
--- a/examples/ensemble/plot_bias_variance.py
+++ b/examples/ensemble/plot_bias_variance.py
@@ -66,8 +66,8 @@
 # Author: Gilles Louppe <g.louppe@gmail.com>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.ensemble import BaggingRegressor
 from sklearn.tree import DecisionTreeRegressor
diff --git a/examples/ensemble/plot_ensemble_oob.py b/examples/ensemble/plot_ensemble_oob.py
index bd678af42a7d1..972ca1f6259aa 100644
--- a/examples/ensemble/plot_ensemble_oob.py
+++ b/examples/ensemble/plot_ensemble_oob.py
@@ -26,9 +26,10 @@
 #
 # License: BSD 3 Clause
 
+from collections import OrderedDict
+
 import matplotlib.pyplot as plt
 
-from collections import OrderedDict
 from sklearn.datasets import make_classification
 from sklearn.ensemble import RandomForestClassifier
 
diff --git a/examples/ensemble/plot_feature_transformation.py b/examples/ensemble/plot_feature_transformation.py
index 36eb87bb757cd..8a17dd9d74194 100644
--- a/examples/ensemble/plot_feature_transformation.py
+++ b/examples/ensemble/plot_feature_transformation.py
@@ -59,7 +59,7 @@
 # First, we will start by training the random forest and gradient boosting on
 # the separated training set
 
-from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
 
 random_forest = RandomForestClassifier(
     n_estimators=n_estimators, max_depth=max_depth, random_state=10
@@ -105,8 +105,7 @@
 # method `apply`. The pipeline in scikit-learn expects a call to `transform`.
 # Therefore, we wrapped the call to `apply` within a `FunctionTransformer`.
 
-from sklearn.preprocessing import FunctionTransformer
-from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
 
 
 def rf_apply(X, model):
@@ -143,6 +142,7 @@ def gbdt_apply(X, model):
 # We can finally show the different ROC curves for all the models.
 
 import matplotlib.pyplot as plt
+
 from sklearn.metrics import RocCurveDisplay
 
 fig, ax = plt.subplots()
diff --git a/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py b/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py
index b4a1993471474..0dde24116065d 100644
--- a/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py
+++ b/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py
@@ -11,8 +11,8 @@
 The comparison is made by varying the parameters that control the number of
 trees according to each estimator:
 
-- `n_estimators` controls the number of trees in the forest. It's a fixed numer.
-- `max_iter` is the the maximum number of iterations in a gradient boosting
+- `n_estimators` controls the number of trees in the forest. It's a fixed number.
+- `max_iter` is the maximum number of iterations in a gradient boosting
   based model. The number of iterations corresponds to the number of trees for
   regression and binary classification problems. Furthermore, the actual number
   of trees required by the model depends on the stopping criteria.
@@ -78,8 +78,8 @@
 # here to keep the example simple.
 
 import pandas as pd
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.ensemble import RandomForestRegressor
+
+from sklearn.ensemble import HistGradientBoostingRegressor, RandomForestRegressor
 from sklearn.model_selection import GridSearchCV, KFold
 
 models = {
@@ -123,8 +123,8 @@
 # Error bars correspond to one standard deviation as computed in the different
 # folds of the cross-validation.
 
-import plotly.express as px
 import plotly.colors as colors
+import plotly.express as px
 from plotly.subplots import make_subplots
 
 fig = make_subplots(
@@ -202,7 +202,7 @@
 # makes fitting and scoring slower. The RF model reaches such plateau earlier
 # and can never reach the test score of the largest HGBDT model.
 #
-# Note that the results shown on the above plot can change sightly across runs
+# Note that the results shown on the above plot can change slightly across runs
 # and even more significantly when running on other machines: try to run this
 # example on your own local machine.
 #
@@ -210,7 +210,7 @@
 # models uniformly dominate the Random Forest models in the "test score vs
 # training speed trade-off" (the HGBDT curve should be on the top left of the RF
 # curve, without ever crossing). The "test score vs prediction speed" trade-off
-# can also be more disputed but it's most often favorable to HGBDT. It's always
+# can also be more disputed, but it's most often favorable to HGBDT. It's always
 # a good idea to check both kinds of model (with hyper-parameter tuning) and
 # compare their performance on your specific problem to determine which model is
 # the best fit but **HGBT almost always offers a more favorable speed-accuracy
diff --git a/examples/ensemble/plot_forest_importances.py b/examples/ensemble/plot_forest_importances.py
index fbda63b26faee..269451168dd7a 100644
--- a/examples/ensemble/plot_forest_importances.py
+++ b/examples/ensemble/plot_forest_importances.py
@@ -57,6 +57,7 @@
 #     cardinality** features (many unique values). See
 #     :ref:`permutation_importance` as an alternative below.
 import time
+
 import numpy as np
 
 start_time = time.time()
diff --git a/examples/ensemble/plot_forest_importances_faces.py b/examples/ensemble/plot_forest_importances_faces.py
index 3848873c297de..8b8e8751ec5a2 100644
--- a/examples/ensemble/plot_forest_importances_faces.py
+++ b/examples/ensemble/plot_forest_importances_faces.py
@@ -59,6 +59,7 @@
 #     cardinality** features (many unique values). See
 #     :ref:`permutation_importance` as an alternative.
 import time
+
 import matplotlib.pyplot as plt
 
 start_time = time.time()
diff --git a/examples/ensemble/plot_forest_iris.py b/examples/ensemble/plot_forest_iris.py
index ee414db7125dc..6aaceea88efd2 100644
--- a/examples/ensemble/plot_forest_iris.py
+++ b/examples/ensemble/plot_forest_iris.py
@@ -42,15 +42,15 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.colors import ListedColormap
 
 from sklearn.datasets import load_iris
 from sklearn.ensemble import (
-    RandomForestClassifier,
-    ExtraTreesClassifier,
     AdaBoostClassifier,
+    ExtraTreesClassifier,
+    RandomForestClassifier,
 )
 from sklearn.tree import DecisionTreeClassifier
 
diff --git a/examples/ensemble/plot_gradient_boosting_categorical.py b/examples/ensemble/plot_gradient_boosting_categorical.py
index fa4b68be9cbb7..0dd0a84243b4d 100644
--- a/examples/ensemble/plot_gradient_boosting_categorical.py
+++ b/examples/ensemble/plot_gradient_boosting_categorical.py
@@ -77,10 +77,9 @@
 # As a baseline, we create an estimator where the categorical features are
 # dropped:
 
+from sklearn.compose import make_column_selector, make_column_transformer
 from sklearn.ensemble import HistGradientBoostingRegressor
 from sklearn.pipeline import make_pipeline
-from sklearn.compose import make_column_transformer
-from sklearn.compose import make_column_selector
 
 dropper = make_column_transformer(
     ("drop", make_column_selector(dtype_include="category")), remainder="passthrough"
@@ -114,9 +113,10 @@
 # were ordered quantities, i.e. the categories will be encoded as 0, 1, 2,
 # etc., and treated as continuous features.
 
-from sklearn.preprocessing import OrdinalEncoder
 import numpy as np
 
+from sklearn.preprocessing import OrdinalEncoder
+
 ordinal_encoder = make_column_transformer(
     (
         OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=np.nan),
@@ -166,9 +166,10 @@
 # models performance in terms of
 # :func:`~metrics.mean_absolute_percentage_error` and fit times.
 
-from sklearn.model_selection import cross_validate
 import matplotlib.pyplot as plt
 
+from sklearn.model_selection import cross_validate
+
 scoring = "neg_mean_absolute_percentage_error"
 n_cv_folds = 3
 
diff --git a/examples/ensemble/plot_gradient_boosting_early_stopping.py b/examples/ensemble/plot_gradient_boosting_early_stopping.py
index 6f1013eed9564..f271f80a07c55 100644
--- a/examples/ensemble/plot_gradient_boosting_early_stopping.py
+++ b/examples/ensemble/plot_gradient_boosting_early_stopping.py
@@ -38,11 +38,10 @@
 
 import time
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn import ensemble
-from sklearn import datasets
+from sklearn import datasets, ensemble
 from sklearn.model_selection import train_test_split
 
 data_list = [
diff --git a/examples/ensemble/plot_gradient_boosting_oob.py b/examples/ensemble/plot_gradient_boosting_oob.py
index dd7f19a1fe245..0cb40ad2c11ea 100644
--- a/examples/ensemble/plot_gradient_boosting_oob.py
+++ b/examples/ensemble/plot_gradient_boosting_oob.py
@@ -26,15 +26,13 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+from scipy.special import expit
 
 from sklearn import ensemble
-from sklearn.model_selection import KFold
-from sklearn.model_selection import train_test_split
 from sklearn.metrics import log_loss
-
-from scipy.special import expit
+from sklearn.model_selection import KFold, train_test_split
 
 # Generate data (adapted from G. Ridgeway's gbm example)
 n_samples = 1000
diff --git a/examples/ensemble/plot_gradient_boosting_quantile.py b/examples/ensemble/plot_gradient_boosting_quantile.py
index 2aa04c3988d9e..d1464ba92c572 100644
--- a/examples/ensemble/plot_gradient_boosting_quantile.py
+++ b/examples/ensemble/plot_gradient_boosting_quantile.py
@@ -12,6 +12,7 @@
 # Generate some data for a synthetic regression problem by applying the
 # function f to uniformly sampled random inputs.
 import numpy as np
+
 from sklearn.model_selection import train_test_split
 
 
@@ -58,7 +59,6 @@ def f(x):
 from sklearn.ensemble import GradientBoostingRegressor
 from sklearn.metrics import mean_pinball_loss, mean_squared_error
 
-
 all_models = {}
 common_params = dict(
     learning_rate=0.05,
@@ -93,7 +93,6 @@ def f(x):
 # 90% interval (from 5th to 95th conditional percentiles).
 import matplotlib.pyplot as plt
 
-
 y_pred = all_models["mse"].predict(xx)
 y_lower = all_models["q 0.05"].predict(xx)
 y_upper = all_models["q 0.95"].predict(xx)
@@ -129,8 +128,8 @@ def f(x):
 # Analysis of the error metrics
 # -----------------------------
 #
-# Measure the models with :func:`mean_squared_error` and
-# :func:`mean_pinball_loss` metrics on the training dataset.
+# Measure the models with :func:`~sklearn.metrics.mean_squared_error` and
+# :func:`~sklearn.metrics.mean_pinball_loss` metrics on the training dataset.
 import pandas as pd
 
 
@@ -157,7 +156,7 @@ def highlight_min(x):
 # training converged.
 #
 # Note that because the target distribution is asymmetric, the expected
-# conditional mean and conditional median are signficiantly different and
+# conditional mean and conditional median are significantly different and
 # therefore one could not use the squared error model get a good estimation of
 # the conditional median nor the converse.
 #
@@ -195,7 +194,7 @@ def highlight_min(x):
 # --------------------------------------
 #
 # We can also evaluate the ability of the two extreme quantile estimators at
-# producing a well-calibrated conditational 90%-confidence interval.
+# producing a well-calibrated conditional 90%-confidence interval.
 #
 # To do this we can compute the fraction of observations that fall between the
 # predictions:
diff --git a/examples/ensemble/plot_gradient_boosting_regression.py b/examples/ensemble/plot_gradient_boosting_regression.py
index 3e378e8af7203..94705ccfeca24 100644
--- a/examples/ensemble/plot_gradient_boosting_regression.py
+++ b/examples/ensemble/plot_gradient_boosting_regression.py
@@ -23,6 +23,7 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
+
 from sklearn import datasets, ensemble
 from sklearn.inspection import permutation_importance
 from sklearn.metrics import mean_squared_error
diff --git a/examples/ensemble/plot_gradient_boosting_regularization.py b/examples/ensemble/plot_gradient_boosting_regularization.py
index a4ac69a822b92..218d69d5ac7d7 100644
--- a/examples/ensemble/plot_gradient_boosting_regularization.py
+++ b/examples/ensemble/plot_gradient_boosting_regularization.py
@@ -25,11 +25,10 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn import ensemble
-from sklearn import datasets
+from sklearn import datasets, ensemble
 from sklearn.metrics import log_loss
 from sklearn.model_selection import train_test_split
 
diff --git a/examples/ensemble/plot_isolation_forest.py b/examples/ensemble/plot_isolation_forest.py
index aeabb60203ac6..f5fad1d7b9ea9 100644
--- a/examples/ensemble/plot_isolation_forest.py
+++ b/examples/ensemble/plot_isolation_forest.py
@@ -31,6 +31,7 @@
 # the label `-1`.
 
 import numpy as np
+
 from sklearn.model_selection import train_test_split
 
 n_samples, n_outliers = 120, 40
@@ -78,6 +79,7 @@
 # or not. The scatter plot displays the true labels.
 
 import matplotlib.pyplot as plt
+
 from sklearn.inspection import DecisionBoundaryDisplay
 
 disp = DecisionBoundaryDisplay.from_estimator(
diff --git a/examples/ensemble/plot_monotonic_constraints.py b/examples/ensemble/plot_monotonic_constraints.py
index b1f7ca8ed24ed..15ad8e9524243 100644
--- a/examples/ensemble/plot_monotonic_constraints.py
+++ b/examples/ensemble/plot_monotonic_constraints.py
@@ -20,11 +20,11 @@
 
 """
 # %%
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.inspection import PartialDependenceDisplay
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
+from sklearn.ensemble import HistGradientBoostingRegressor
+from sklearn.inspection import PartialDependenceDisplay
 
 rng = np.random.RandomState(0)
 
diff --git a/examples/ensemble/plot_random_forest_embedding.py b/examples/ensemble/plot_random_forest_embedding.py
index 000b83e67b92a..fe26e04ca7789 100644
--- a/examples/ensemble/plot_random_forest_embedding.py
+++ b/examples/ensemble/plot_random_forest_embedding.py
@@ -26,12 +26,12 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.datasets import make_circles
-from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
 from sklearn.decomposition import TruncatedSVD
+from sklearn.ensemble import ExtraTreesClassifier, RandomTreesEmbedding
 from sklearn.naive_bayes import BernoulliNB
 
 # make a synthetic dataset
diff --git a/examples/ensemble/plot_random_forest_regression_multioutput.py b/examples/ensemble/plot_random_forest_regression_multioutput.py
index 4b3d4f4a9a728..ce8346c329127 100644
--- a/examples/ensemble/plot_random_forest_regression_multioutput.py
+++ b/examples/ensemble/plot_random_forest_regression_multioutput.py
@@ -25,13 +25,13 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.model_selection import train_test_split
 from sklearn.multioutput import MultiOutputRegressor
 
-
 # Create a random dataset
 rng = np.random.RandomState(1)
 X = np.sort(200 * rng.rand(600, 1) - 100, axis=0)
diff --git a/examples/ensemble/plot_stack_predictors.py b/examples/ensemble/plot_stack_predictors.py
index 56a82ded5b725..aac7ccc8a3ef8 100644
--- a/examples/ensemble/plot_stack_predictors.py
+++ b/examples/ensemble/plot_stack_predictors.py
@@ -131,8 +131,7 @@ def load_ames_housing():
 # Then, we will now define the preprocessor used when the ending regressor
 # is a linear model.
 
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 cat_linear_processor = OneHotEncoder(handle_unknown="ignore")
 num_linear_processor = make_pipeline(
@@ -206,9 +205,11 @@ def load_ames_housing():
 
 
 import time
+
 import matplotlib.pyplot as plt
+
 from sklearn.metrics import PredictionErrorDisplay
-from sklearn.model_selection import cross_validate, cross_val_predict
+from sklearn.model_selection import cross_val_predict, cross_validate
 
 fig, axs = plt.subplots(2, 2, figsize=(9, 7))
 axs = np.ravel(axs)
diff --git a/examples/ensemble/plot_voting_decision_regions.py b/examples/ensemble/plot_voting_decision_regions.py
index e6dc68eeadf98..90441c6d28339 100644
--- a/examples/ensemble/plot_voting_decision_regions.py
+++ b/examples/ensemble/plot_voting_decision_regions.py
@@ -28,11 +28,11 @@
 import matplotlib.pyplot as plt
 
 from sklearn import datasets
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import SVC
 from sklearn.ensemble import VotingClassifier
 from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
 
 # Loading some example data
 iris = datasets.load_iris()
diff --git a/examples/ensemble/plot_voting_probas.py b/examples/ensemble/plot_voting_probas.py
index 54c290c3073e0..14f4f4330c045 100644
--- a/examples/ensemble/plot_voting_probas.py
+++ b/examples/ensemble/plot_voting_probas.py
@@ -23,13 +23,12 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
+from sklearn.ensemble import RandomForestClassifier, VotingClassifier
 from sklearn.linear_model import LogisticRegression
 from sklearn.naive_bayes import GaussianNB
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import VotingClassifier
 
 clf1 = LogisticRegression(max_iter=1000, random_state=123)
 clf2 = RandomForestClassifier(n_estimators=100, random_state=123)
diff --git a/examples/ensemble/plot_voting_regressor.py b/examples/ensemble/plot_voting_regressor.py
index 23e709cc9e62a..d33becca505e3 100644
--- a/examples/ensemble/plot_voting_regressor.py
+++ b/examples/ensemble/plot_voting_regressor.py
@@ -26,10 +26,12 @@
 import matplotlib.pyplot as plt
 
 from sklearn.datasets import load_diabetes
-from sklearn.ensemble import GradientBoostingRegressor
-from sklearn.ensemble import RandomForestRegressor
+from sklearn.ensemble import (
+    GradientBoostingRegressor,
+    RandomForestRegressor,
+    VotingRegressor,
+)
 from sklearn.linear_model import LinearRegression
-from sklearn.ensemble import VotingRegressor
 
 # %%
 # Training classifiers
diff --git a/examples/exercises/plot_cv_digits.py b/examples/exercises/plot_cv_digits.py
index e43bbd86bb027..ebad3a55098b5 100644
--- a/examples/exercises/plot_cv_digits.py
+++ b/examples/exercises/plot_cv_digits.py
@@ -11,8 +11,9 @@
 """
 
 import numpy as np
-from sklearn.model_selection import cross_val_score
+
 from sklearn import datasets, svm
+from sklearn.model_selection import cross_val_score
 
 X, y = datasets.load_digits(return_X_y=True)
 
diff --git a/examples/exercises/plot_digits_classification_exercise.py b/examples/exercises/plot_digits_classification_exercise.py
index 877e615659743..25b0171c66421 100644
--- a/examples/exercises/plot_digits_classification_exercise.py
+++ b/examples/exercises/plot_digits_classification_exercise.py
@@ -12,7 +12,7 @@
 
 """
 
-from sklearn import datasets, neighbors, linear_model
+from sklearn import datasets, linear_model, neighbors
 
 X_digits, y_digits = datasets.load_digits(return_X_y=True)
 X_digits = X_digits / X_digits.max()
diff --git a/examples/exercises/plot_iris_exercise.py b/examples/exercises/plot_iris_exercise.py
index 74da8c27889c9..07687b920e1b8 100644
--- a/examples/exercises/plot_iris_exercise.py
+++ b/examples/exercises/plot_iris_exercise.py
@@ -10,8 +10,9 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import datasets, svm
 
 iris = datasets.load_iris()
diff --git a/examples/feature_selection/plot_f_test_vs_mi.py b/examples/feature_selection/plot_f_test_vs_mi.py
index ba82625a7cfaf..5c015e7e4fd58 100644
--- a/examples/feature_selection/plot_f_test_vs_mi.py
+++ b/examples/feature_selection/plot_f_test_vs_mi.py
@@ -23,8 +23,9 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.feature_selection import f_regression, mutual_info_regression
 
 np.random.seed(0)
diff --git a/examples/feature_selection/plot_feature_selection.py b/examples/feature_selection/plot_feature_selection.py
index ce2bad8626a79..c57a2d5d6b6f9 100644
--- a/examples/feature_selection/plot_feature_selection.py
+++ b/examples/feature_selection/plot_feature_selection.py
@@ -21,6 +21,7 @@
 # --------------------
 #
 import numpy as np
+
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 
diff --git a/examples/feature_selection/plot_rfe_digits.py b/examples/feature_selection/plot_rfe_digits.py
index 9684f5fabd383..553f38f9c674f 100644
--- a/examples/feature_selection/plot_rfe_digits.py
+++ b/examples/feature_selection/plot_rfe_digits.py
@@ -12,10 +12,11 @@
 
 """  # noqa: E501
 
-from sklearn.svm import SVC
+import matplotlib.pyplot as plt
+
 from sklearn.datasets import load_digits
 from sklearn.feature_selection import RFE
-import matplotlib.pyplot as plt
+from sklearn.svm import SVC
 
 # Load the digits dataset
 digits = load_digits()
diff --git a/examples/feature_selection/plot_rfe_with_cross_validation.py b/examples/feature_selection/plot_rfe_with_cross_validation.py
index 2d52ea5a3fdf3..693e21fe21787 100644
--- a/examples/feature_selection/plot_rfe_with_cross_validation.py
+++ b/examples/feature_selection/plot_rfe_with_cross_validation.py
@@ -39,8 +39,8 @@
 # strategy "accuracy" optimizes the proportion of correctly classified samples.
 
 from sklearn.feature_selection import RFECV
-from sklearn.model_selection import StratifiedKFold
 from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import StratifiedKFold
 
 min_features_to_select = 1  # Minimum number of features to consider
 clf = LogisticRegression()
diff --git a/examples/feature_selection/plot_select_from_model_diabetes.py b/examples/feature_selection/plot_select_from_model_diabetes.py
index 16f63868feae0..f008d8d6e8b68 100644
--- a/examples/feature_selection/plot_select_from_model_diabetes.py
+++ b/examples/feature_selection/plot_select_from_model_diabetes.py
@@ -6,7 +6,7 @@
 This example illustrates and compares two approaches for feature selection:
 :class:`~sklearn.feature_selection.SelectFromModel` which is based on feature
 importance, and
-:class:`~sklearn.feature_selection.SequentialFeatureSelection` which relies
+:class:`~sklearn.feature_selection.SequentialFeatureSelector` which relies
 on a greedy approach.
 
 We use the Diabetes dataset, which consists of 10 features collected from 442
@@ -43,9 +43,10 @@
 # were already standardized.
 # For a more complete example on the interpretations of the coefficients of
 # linear models, you may refer to
-# :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`.
+# :ref:`sphx_glr_auto_examples_inspection_plot_linear_model_coefficient_interpretation.py`.  # noqa: E501
 import matplotlib.pyplot as plt
 import numpy as np
+
 from sklearn.linear_model import RidgeCV
 
 ridge = RidgeCV(alphas=np.logspace(-6, 6, num=5)).fit(X, y)
@@ -67,9 +68,10 @@
 #
 # Since we want to select only 2 features, we will set this threshold slightly
 # above the coefficient of third most important feature.
-from sklearn.feature_selection import SelectFromModel
 from time import time
 
+from sklearn.feature_selection import SelectFromModel
+
 threshold = np.sort(importance)[-3] + 0.01
 
 tic = time()
@@ -120,9 +122,6 @@
 print(f"Done in {toc_bwd - tic_bwd:.3f}s")
 
 # %%
-# Discussion
-# ----------
-#
 # Interestingly, forward and backward selection have selected the same set of
 # features. In general, this isn't the case and the two methods would lead to
 # different results.
@@ -143,3 +142,54 @@
 # attribute. The forward SFS is faster than the backward SFS because it only
 # needs to perform `n_features_to_select = 2` iterations, while the backward
 # SFS needs to perform `n_features - n_features_to_select = 8` iterations.
+#
+# Using negative tolerance values
+# -------------------------------
+#
+# :class:`~sklearn.feature_selection.SequentialFeatureSelector` can be used
+# to remove features present in the dataset and return a
+# smaller subset of the original features with `direction="backward"`
+# and a negative value of `tol`.
+#
+# We begin by loading the Breast Cancer dataset, consisting of 30 different
+# features and 569 samples.
+import numpy as np
+
+from sklearn.datasets import load_breast_cancer
+
+breast_cancer_data = load_breast_cancer()
+X, y = breast_cancer_data.data, breast_cancer_data.target
+feature_names = np.array(breast_cancer_data.feature_names)
+print(breast_cancer_data.DESCR)
+
+# %%
+# We will make use of the :class:`~sklearn.linear_model.LogisticRegression`
+# estimator with :class:`~sklearn.feature_selection.SequentialFeatureSelector`
+# to perform the feature selection.
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import roc_auc_score
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+
+for tol in [-1e-2, -1e-3, -1e-4]:
+    start = time()
+    feature_selector = SequentialFeatureSelector(
+        LogisticRegression(),
+        n_features_to_select="auto",
+        direction="backward",
+        scoring="roc_auc",
+        tol=tol,
+        n_jobs=2,
+    )
+    model = make_pipeline(StandardScaler(), feature_selector, LogisticRegression())
+    model.fit(X, y)
+    end = time()
+    print(f"\ntol: {tol}")
+    print(f"Features selected: {feature_names[model[1].get_support()]}")
+    print(f"ROC AUC score: {roc_auc_score(y, model.predict_proba(X)[:, 1]):.3f}")
+    print(f"Done in {end - start:.3f}s")
+
+# %%
+# We can see that the number of features selected tend to increase as negative
+# values of `tol` approach to zero. The time taken for feature selection also
+# decreases as the values of `tol` come closer to zero.
diff --git a/examples/gaussian_process/plot_compare_gpr_krr.py b/examples/gaussian_process/plot_compare_gpr_krr.py
index 7a58ba437278f..8379baf148256 100644
--- a/examples/gaussian_process/plot_compare_gpr_krr.py
+++ b/examples/gaussian_process/plot_compare_gpr_krr.py
@@ -125,6 +125,7 @@
 #
 # Thus, let's use such a :class:`~sklearn.kernel_ridge.KernelRidge`.
 import time
+
 from sklearn.gaussian_process.kernels import ExpSineSquared
 from sklearn.kernel_ridge import KernelRidge
 
@@ -176,9 +177,10 @@
 # parameter and the kernel parameters.
 
 # %%
-from sklearn.model_selection import RandomizedSearchCV
 from scipy.stats import loguniform
 
+from sklearn.model_selection import RandomizedSearchCV
+
 param_distributions = {
     "alpha": loguniform(1e0, 1e3),
     "kernel__length_scale": loguniform(1e-2, 1e2),
diff --git a/examples/gaussian_process/plot_gpc.py b/examples/gaussian_process/plot_gpc.py
index e2d78fa23f09e..21a99065e06ce 100644
--- a/examples/gaussian_process/plot_gpc.py
+++ b/examples/gaussian_process/plot_gpc.py
@@ -27,13 +27,11 @@
 # License: BSD 3 clause
 
 import numpy as np
-
 from matplotlib import pyplot as plt
 
-from sklearn.metrics import accuracy_score, log_loss
 from sklearn.gaussian_process import GaussianProcessClassifier
 from sklearn.gaussian_process.kernels import RBF
-
+from sklearn.metrics import accuracy_score, log_loss
 
 # Generate data
 train_size = 50
diff --git a/examples/gaussian_process/plot_gpc_iris.py b/examples/gaussian_process/plot_gpc_iris.py
index ce0ed066a1377..88c536d8824c8 100644
--- a/examples/gaussian_process/plot_gpc_iris.py
+++ b/examples/gaussian_process/plot_gpc_iris.py
@@ -10,8 +10,9 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import datasets
 from sklearn.gaussian_process import GaussianProcessClassifier
 from sklearn.gaussian_process.kernels import RBF
diff --git a/examples/gaussian_process/plot_gpc_isoprobability.py b/examples/gaussian_process/plot_gpc_isoprobability.py
index cc036244bc17a..a986d285632b7 100644
--- a/examples/gaussian_process/plot_gpc_isoprobability.py
+++ b/examples/gaussian_process/plot_gpc_isoprobability.py
@@ -14,12 +14,12 @@
 # License: BSD 3 clause
 
 import numpy as np
-
-from matplotlib import pyplot as plt
 from matplotlib import cm
+from matplotlib import pyplot as plt
 
 from sklearn.gaussian_process import GaussianProcessClassifier
-from sklearn.gaussian_process.kernels import DotProduct, ConstantKernel as C
+from sklearn.gaussian_process.kernels import ConstantKernel as C
+from sklearn.gaussian_process.kernels import DotProduct
 
 # A few constants
 lim = 8
diff --git a/examples/gaussian_process/plot_gpc_xor.py b/examples/gaussian_process/plot_gpc_xor.py
index 6e6217dba8b9e..4439a5ee722b6 100644
--- a/examples/gaussian_process/plot_gpc_xor.py
+++ b/examples/gaussian_process/plot_gpc_xor.py
@@ -15,13 +15,12 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.gaussian_process import GaussianProcessClassifier
 from sklearn.gaussian_process.kernels import RBF, DotProduct
 
-
 xx, yy = np.meshgrid(np.linspace(-3, 3, 50), np.linspace(-3, 3, 50))
 rng = np.random.RandomState(0)
 X = rng.randn(200, 2)
diff --git a/examples/gaussian_process/plot_gpr_co2.py b/examples/gaussian_process/plot_gpr_co2.py
index bfc1c21631b26..787574a65a9d3 100644
--- a/examples/gaussian_process/plot_gpr_co2.py
+++ b/examples/gaussian_process/plot_gpr_co2.py
@@ -1,7 +1,7 @@
 """
-=======================================================
-Gaussian process regression (GPR) on Mauna Loa CO2 data
-=======================================================
+====================================================================================
+Forecasting of CO2 level on Mona Loa dataset using Gaussian process regression (GPR)
+====================================================================================
 
 This example is based on Section 5.4.3 of "Gaussian Processes for Machine
 Learning" [RW2006]_. It illustrates an example of complex kernel engineering
@@ -172,6 +172,7 @@
 # Thus, we create synthetic data from 1958 to the current month. In addition,
 # we need to add the subtracted mean computed during training.
 import datetime
+
 import numpy as np
 
 today = datetime.datetime.now()
diff --git a/examples/gaussian_process/plot_gpr_noisy.py b/examples/gaussian_process/plot_gpr_noisy.py
index b76fc745e7df7..31d3b149aa47f 100644
--- a/examples/gaussian_process/plot_gpr_noisy.py
+++ b/examples/gaussian_process/plot_gpr_noisy.py
@@ -1,7 +1,7 @@
 """
-=============================================================
-Gaussian process regression (GPR) with noise-level estimation
-=============================================================
+=========================================================================
+Ability of Gaussian process regression (GPR) to estimate data noise-level
+=========================================================================
 
 This example shows the ability of the
 :class:`~sklearn.gaussian_process.kernels.WhiteKernel` to estimate the noise
diff --git a/examples/gaussian_process/plot_gpr_on_structured_data.py b/examples/gaussian_process/plot_gpr_on_structured_data.py
index ada50a0edf06b..e702f1fe0769a 100644
--- a/examples/gaussian_process/plot_gpr_on_structured_data.py
+++ b/examples/gaussian_process/plot_gpr_on_structured_data.py
@@ -40,11 +40,10 @@
 
 # %%
 import numpy as np
-from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
-from sklearn.gaussian_process.kernels import GenericKernelMixin
-from sklearn.gaussian_process import GaussianProcessRegressor
-from sklearn.gaussian_process import GaussianProcessClassifier
+
 from sklearn.base import clone
+from sklearn.gaussian_process import GaussianProcessClassifier, GaussianProcessRegressor
+from sklearn.gaussian_process.kernels import GenericKernelMixin, Hyperparameter, Kernel
 
 
 class SequenceKernel(GenericKernelMixin, Kernel):
diff --git a/examples/impute/plot_iterative_imputer_variants_comparison.py b/examples/impute/plot_iterative_imputer_variants_comparison.py
index d83922817e5de..445a08c05f02f 100644
--- a/examples/impute/plot_iterative_imputer_variants_comparison.py
+++ b/examples/impute/plot_iterative_imputer_variants_comparison.py
@@ -13,8 +13,8 @@
 imputation with :class:`~impute.IterativeImputer`:
 
 * :class:`~linear_model.BayesianRidge`: regularized linear regression
-* :class:`~tree.RandomForestRegressor`: Forests of randomized trees regression
-* :func:`~pipeline.make_pipeline`(:class:`~kernel_approximation.Nystroem`,
+* :class:`~ensemble.RandomForestRegressor`: Forests of randomized trees regression
+* :func:`~pipeline.make_pipeline` (:class:`~kernel_approximation.Nystroem`,
   :class:`~linear_model.Ridge`): a pipeline with the expansion of a degree 2
   polynomial kernel and regularized linear regression
 * :class:`~neighbors.KNeighborsRegressor`: comparable to other KNN
@@ -44,21 +44,21 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 
+from sklearn.datasets import fetch_california_housing
+from sklearn.ensemble import RandomForestRegressor
+
 # To use this experimental feature, we need to explicitly ask for it:
 from sklearn.experimental import enable_iterative_imputer  # noqa
-from sklearn.datasets import fetch_california_housing
-from sklearn.impute import SimpleImputer
-from sklearn.impute import IterativeImputer
-from sklearn.linear_model import BayesianRidge, Ridge
+from sklearn.impute import IterativeImputer, SimpleImputer
 from sklearn.kernel_approximation import Nystroem
-from sklearn.ensemble import RandomForestRegressor
+from sklearn.linear_model import BayesianRidge, Ridge
+from sklearn.model_selection import cross_val_score
 from sklearn.neighbors import KNeighborsRegressor
 from sklearn.pipeline import make_pipeline
-from sklearn.model_selection import cross_val_score
 
 N_SPLITS = 5
 
diff --git a/examples/impute/plot_missing_values.py b/examples/impute/plot_missing_values.py
index f6350ad2544dd..4b9f8ae079d8a 100644
--- a/examples/impute/plot_missing_values.py
+++ b/examples/impute/plot_missing_values.py
@@ -44,9 +44,7 @@
 
 import numpy as np
 
-from sklearn.datasets import fetch_california_housing
-from sklearn.datasets import load_diabetes
-
+from sklearn.datasets import fetch_california_housing, load_diabetes
 
 rng = np.random.RandomState(42)
 
@@ -95,11 +93,10 @@ def add_missing_values(X_full, y_full):
 
 # To use the experimental IterativeImputer, we need to explicitly ask for it:
 from sklearn.experimental import enable_iterative_imputer  # noqa
-from sklearn.impute import SimpleImputer, KNNImputer, IterativeImputer
+from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer
 from sklearn.model_selection import cross_val_score
 from sklearn.pipeline import make_pipeline
 
-
 N_SPLITS = 4
 regressor = RandomForestRegressor(random_state=0)
 
@@ -260,7 +257,6 @@ def get_impute_iterative(X_missing, y_missing):
 
 import matplotlib.pyplot as plt
 
-
 n_bars = len(mses_diabetes)
 xval = np.arange(n_bars)
 
diff --git a/examples/inspection/plot_linear_model_coefficient_interpretation.py b/examples/inspection/plot_linear_model_coefficient_interpretation.py
index d978ee860636c..eb935ee41ae67 100644
--- a/examples/inspection/plot_linear_model_coefficient_interpretation.py
+++ b/examples/inspection/plot_linear_model_coefficient_interpretation.py
@@ -40,10 +40,10 @@
 """
 
 # %%
+import matplotlib.pyplot as plt
 import numpy as np
-import scipy as sp
 import pandas as pd
-import matplotlib.pyplot as plt
+import scipy as sp
 import seaborn as sns
 
 # %%
@@ -53,7 +53,6 @@
 # We fetch the data from `OpenML <http://openml.org/>`_.
 # Note that setting the parameter `as_frame` to True will retrieve the data
 # as a pandas dataframe.
-
 from sklearn.datasets import fetch_openml
 
 survey = fetch_openml(data_id=534, as_frame=True, parser="pandas")
@@ -154,9 +153,9 @@
 # To describe the dataset as a linear model we use a ridge regressor
 # with a very small regularization and to model the logarithm of the WAGE.
 
-from sklearn.pipeline import make_pipeline
-from sklearn.linear_model import Ridge
 from sklearn.compose import TransformedTargetRegressor
+from sklearn.linear_model import Ridge
+from sklearn.pipeline import make_pipeline
 
 model = make_pipeline(
     preprocessor,
@@ -178,8 +177,7 @@
 # on the test set and computing,
 # for example, the median absolute error of the model.
 
-from sklearn.metrics import median_absolute_error
-from sklearn.metrics import PredictionErrorDisplay
+from sklearn.metrics import PredictionErrorDisplay, median_absolute_error
 
 mae_train = median_absolute_error(y_train, model.predict(X_train))
 y_pred = model.predict(X_test)
@@ -319,8 +317,7 @@
 # their robustness is not guaranteed, and they should probably be interpreted
 # with caution.
 
-from sklearn.model_selection import cross_validate
-from sklearn.model_selection import RepeatedKFold
+from sklearn.model_selection import RepeatedKFold, cross_validate
 
 cv = RepeatedKFold(n_splits=5, n_repeats=5, random_state=0)
 cv_model = cross_validate(
diff --git a/examples/inspection/plot_partial_dependence.py b/examples/inspection/plot_partial_dependence.py
index 43404b356d829..ed7a656da9926 100644
--- a/examples/inspection/plot_partial_dependence.py
+++ b/examples/inspection/plot_partial_dependence.py
@@ -100,8 +100,9 @@
 # We plot the average number of bike rentals by grouping the data by season and
 # by year.
 from itertools import product
-import numpy as np
+
 import matplotlib.pyplot as plt
+import numpy as np
 
 days = ("Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat")
 hours = tuple(range(24))
@@ -157,8 +158,7 @@
 # numerical features and encode the categorical features with a
 # :class:`~sklearn.preprocessing.OneHotEncoder`.
 from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import QuantileTransformer
-from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import OneHotEncoder, QuantileTransformer
 
 mlp_preprocessor = ColumnTransformer(
     transformers=[
@@ -203,6 +203,7 @@
 # Let's fit a :class:`~sklearn.neural_network.MLPRegressor` and compute
 # single-variable partial dependence plots.
 from time import time
+
 from sklearn.neural_network import MLPRegressor
 from sklearn.pipeline import make_pipeline
 
@@ -242,6 +243,7 @@
 #
 # We will plot the averaged partial dependence.
 import matplotlib.pyplot as plt
+
 from sklearn.inspection import PartialDependenceDisplay
 
 common_params = {
@@ -529,10 +531,9 @@
 #
 # Let's make the same partial dependence plot for the 2 features interaction,
 # this time in 3 dimensions.
-import numpy as np
-
 # unused but required import for doing 3d projections with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401
+import numpy as np
 
 from sklearn.inspection import partial_dependence
 
diff --git a/examples/inspection/plot_permutation_importance.py b/examples/inspection/plot_permutation_importance.py
index cf0907ce3fd37..789506e892e3a 100644
--- a/examples/inspection/plot_permutation_importance.py
+++ b/examples/inspection/plot_permutation_importance.py
@@ -64,9 +64,9 @@
 #   categorical features;
 # - use :class:`~sklearn.impute.SimpleImputer` to fill missing values for
 #   numerical features using a mean strategy.
+from sklearn.compose import ColumnTransformer
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.impute import SimpleImputer
-from sklearn.compose import ColumnTransformer
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import OrdinalEncoder
 
diff --git a/examples/inspection/plot_permutation_importance_multicollinear.py b/examples/inspection/plot_permutation_importance_multicollinear.py
index 59871c00946a6..a8fe52b1565d9 100644
--- a/examples/inspection/plot_permutation_importance_multicollinear.py
+++ b/examples/inspection/plot_permutation_importance_multicollinear.py
@@ -3,12 +3,15 @@
 Permutation Importance with Multicollinear or Correlated Features
 =================================================================
 
-In this example, we compute the permutation importance on the Wisconsin
-breast cancer dataset using :func:`~sklearn.inspection.permutation_importance`.
-The :class:`~sklearn.ensemble.RandomForestClassifier` can easily get about 97%
-accuracy on a test dataset. Because this dataset contains multicollinear
-features, the permutation importance will show that none of the features are
-important. One approach to handling multicollinearity is by performing
+In this example, we compute the
+:func:`~sklearn.inspection.permutation_importance` of the features to a trained
+:class:`~sklearn.ensemble.RandomForestClassifier` using the
+:ref:`breast_cancer_dataset`. The model can easily get about 97% accuracy on a
+test dataset. Because this dataset contains multicollinear features, the
+permutation importance shows that none of the features are important, in
+contradiction with the high test accuracy.
+
+We demo a possible approach to handling multicollinearity, which consists of
 hierarchical clustering on the features' Spearman rank-order correlations,
 picking a threshold, and keeping a single feature from each cluster.
 
@@ -18,68 +21,106 @@
 
 """
 
-from collections import defaultdict
+# %%
+# Random Forest Feature Importance on Breast Cancer Data
+# ------------------------------------------------------
+#
+# First, we define a function to ease the plotting:
+from sklearn.inspection import permutation_importance
 
-import matplotlib.pyplot as plt
-import numpy as np
-from scipy.stats import spearmanr
-from scipy.cluster import hierarchy
-from scipy.spatial.distance import squareform
 
+def plot_permutation_importance(clf, X, y, ax):
+    result = permutation_importance(clf, X, y, n_repeats=10, random_state=42, n_jobs=2)
+    perm_sorted_idx = result.importances_mean.argsort()
+
+    ax.boxplot(
+        result.importances[perm_sorted_idx].T,
+        vert=False,
+        labels=X.columns[perm_sorted_idx],
+    )
+    ax.axvline(x=0, color="k", linestyle="--")
+    return ax
+
+
+# %%
+# We then train a :class:`~sklearn.ensemble.RandomForestClassifier` on the
+# :ref:`breast_cancer_dataset` and evaluate its accuracy on a test set:
 from sklearn.datasets import load_breast_cancer
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.inspection import permutation_importance
 from sklearn.model_selection import train_test_split
 
-# %%
-# Random Forest Feature Importance on Breast Cancer Data
-# ------------------------------------------------------
-# First, we train a random forest on the breast cancer dataset and evaluate
-# its accuracy on a test set:
-data = load_breast_cancer()
-X, y = data.data, data.target
+X, y = load_breast_cancer(return_X_y=True, as_frame=True)
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
 
 clf = RandomForestClassifier(n_estimators=100, random_state=42)
 clf.fit(X_train, y_train)
-print("Accuracy on test data: {:.2f}".format(clf.score(X_test, y_test)))
+print(f"Baseline accuracy on test data: {clf.score(X_test, y_test):.2}")
 
 # %%
 # Next, we plot the tree based feature importance and the permutation
-# importance. The permutation importance plot shows that permuting a feature
-# drops the accuracy by at most `0.012`, which would suggest that none of the
-# features are important. This is in contradiction with the high test accuracy
-# computed above: some feature must be important. The permutation importance
-# is calculated on the training set to show how much the model relies on each
-# feature during training.
-result = permutation_importance(clf, X_train, y_train, n_repeats=10, random_state=42)
-perm_sorted_idx = result.importances_mean.argsort()
+# importance. The permutation importance is calculated on the training set to
+# show how much the model relies on each feature during training.
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
 
+mdi_importances = pd.Series(clf.feature_importances_, index=X_train.columns)
 tree_importance_sorted_idx = np.argsort(clf.feature_importances_)
 tree_indices = np.arange(0, len(clf.feature_importances_)) + 0.5
 
 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
-ax1.barh(tree_indices, clf.feature_importances_[tree_importance_sorted_idx], height=0.7)
-ax1.set_yticks(tree_indices)
-ax1.set_yticklabels(data.feature_names[tree_importance_sorted_idx])
-ax1.set_ylim((0, len(clf.feature_importances_)))
-ax2.boxplot(
-    result.importances[perm_sorted_idx].T,
-    vert=False,
-    labels=data.feature_names[perm_sorted_idx],
+mdi_importances.sort_values().plot.barh(ax=ax1)
+ax1.set_xlabel("Gini importance")
+plot_permutation_importance(clf, X_train, y_train, ax2)
+ax2.set_xlabel("Decrease in accuracy score")
+fig.suptitle(
+    "Impurity-based vs. permutation importances on multicollinear features (train set)"
 )
-fig.tight_layout()
-plt.show()
+_ = fig.tight_layout()
+
+# %%
+# The plot on the left shows the Gini importance of the model. As the
+# scikit-learn implementation of
+# :class:`~sklearn.ensemble.RandomForestClassifier` uses a random subsets of
+# :math:`\sqrt{n_\text{features}}` features at each split, it is able to dilute
+# the dominance of any single correlated feature. As a result, the individual
+# feature importance may be distributed more evenly among the correlated
+# features. Since the features have large cardinality and the classifier is
+# non-overfitted, we can relatively trust those values.
+#
+# The permutation importance on the right plot shows that permuting a feature
+# drops the accuracy by at most `0.012`, which would suggest that none of the
+# features are important. This is in contradiction with the high test accuracy
+# computed as baseline: some feature must be important.
+#
+# Similarly, the change in accuracy score computed on the test set appears to be
+# driven by chance:
+
+fig, ax = plt.subplots(figsize=(7, 6))
+plot_permutation_importance(clf, X_test, y_test, ax)
+ax.set_title("Permutation Importances on multicollinear features\n(test set)")
+ax.set_xlabel("Decrease in accuracy score")
+_ = ax.figure.tight_layout()
 
 # %%
+# Nevertheless, one can still compute a meaningful permutation importance in the
+# presence of correlated features, as demonstrated in the following section.
+#
 # Handling Multicollinear Features
 # --------------------------------
-# When features are collinear, permutating one feature will have little
-# effect on the models performance because it can get the same information
-# from a correlated feature. One way to handle multicollinear features is by
-# performing hierarchical clustering on the Spearman rank-order correlations,
-# picking a threshold, and keeping a single feature from each cluster. First,
-# we plot a heatmap of the correlated features:
+# When features are collinear, permuting one feature has little effect on the
+# models performance because it can get the same information from a correlated
+# feature. Note that this is not the case for all predictive models and depends
+# on their underlying implementation.
+#
+# One way to handle multicollinear features is by performing hierarchical
+# clustering on the Spearman rank-order correlations, picking a threshold, and
+# keeping a single feature from each cluster. First, we plot a heatmap of the
+# correlated features:
+from scipy.cluster import hierarchy
+from scipy.spatial.distance import squareform
+from scipy.stats import spearmanr
+
 fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
 corr = spearmanr(X).correlation
 
@@ -92,7 +133,7 @@
 distance_matrix = 1 - np.abs(corr)
 dist_linkage = hierarchy.ward(squareform(distance_matrix))
 dendro = hierarchy.dendrogram(
-    dist_linkage, labels=data.feature_names.tolist(), ax=ax1, leaf_rotation=90
+    dist_linkage, labels=X.columns.to_list(), ax=ax1, leaf_rotation=90
 )
 dendro_idx = np.arange(0, len(dendro["ivl"]))
 
@@ -101,28 +142,40 @@
 ax2.set_yticks(dendro_idx)
 ax2.set_xticklabels(dendro["ivl"], rotation="vertical")
 ax2.set_yticklabels(dendro["ivl"])
-fig.tight_layout()
-plt.show()
+_ = fig.tight_layout()
 
 # %%
-# Next, we manually pick a threshold by visual inspection of the dendrogram
-# to group our features into clusters and choose a feature from each cluster to
+# Next, we manually pick a threshold by visual inspection of the dendrogram to
+# group our features into clusters and choose a feature from each cluster to
 # keep, select those features from our dataset, and train a new random forest.
-# The test accuracy of the new random forest did not change much compared to
-# the random forest trained on the complete dataset.
+# The test accuracy of the new random forest did not change much compared to the
+# random forest trained on the complete dataset.
+from collections import defaultdict
+
 cluster_ids = hierarchy.fcluster(dist_linkage, 1, criterion="distance")
 cluster_id_to_feature_ids = defaultdict(list)
 for idx, cluster_id in enumerate(cluster_ids):
     cluster_id_to_feature_ids[cluster_id].append(idx)
 selected_features = [v[0] for v in cluster_id_to_feature_ids.values()]
+selected_features_names = X.columns[selected_features]
 
-X_train_sel = X_train[:, selected_features]
-X_test_sel = X_test[:, selected_features]
+X_train_sel = X_train[selected_features_names]
+X_test_sel = X_test[selected_features_names]
 
 clf_sel = RandomForestClassifier(n_estimators=100, random_state=42)
 clf_sel.fit(X_train_sel, y_train)
 print(
-    "Accuracy on test data with features removed: {:.2f}".format(
-        clf_sel.score(X_test_sel, y_test)
-    )
+    "Baseline accuracy on test data with features removed:"
+    f" {clf_sel.score(X_test_sel, y_test):.2}"
 )
+
+# %%
+# We can finally explore the permutation importance of the selected subset of
+# features:
+
+fig, ax = plt.subplots(figsize=(7, 6))
+plot_permutation_importance(clf_sel, X_test_sel, y_test, ax)
+ax.set_title("Permutation Importances on selected subset of features\n(test set)")
+ax.set_xlabel("Decrease in accuracy score")
+ax.figure.tight_layout()
+plt.show()
diff --git a/examples/kernel_approximation/plot_scalable_poly_kernels.py b/examples/kernel_approximation/plot_scalable_poly_kernels.py
index 1a46e4bc2aa9c..c3fe5b405d0d0 100644
--- a/examples/kernel_approximation/plot_scalable_poly_kernels.py
+++ b/examples/kernel_approximation/plot_scalable_poly_kernels.py
@@ -1,15 +1,15 @@
 """
-=======================================================
+======================================================
 Scalable learning with polynomial kernel approximation
-=======================================================
+======================================================
+
+.. currentmodule:: sklearn.kernel_approximation
 
 This example illustrates the use of :class:`PolynomialCountSketch` to
 efficiently generate polynomial kernel feature-space approximations.
 This is used to train linear classifiers that approximate the accuracy
 of kernelized ones.
 
-.. currentmodule:: sklearn.kernel_approximation
-
 We use the Covtype dataset [2], trying to reproduce the experiments on the
 original paper of Tensor Sketch [1], i.e. the algorithm implemented by
 :class:`PolynomialCountSketch`.
@@ -64,8 +64,8 @@
 # the LIBSVM webpage, and then normalize to unit length as done in the
 # original Tensor Sketch paper [1].
 
-from sklearn.preprocessing import MinMaxScaler, Normalizer
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import MinMaxScaler, Normalizer
 
 mm = make_pipeline(MinMaxScaler(), Normalizer())
 X_train = mm.fit_transform(X_train)
@@ -80,6 +80,7 @@
 # plot them later.
 
 import time
+
 from sklearn.svm import LinearSVC
 
 results = {}
diff --git a/examples/linear_model/plot_ard.py b/examples/linear_model/plot_ard.py
index 261fec8aeee3b..79b49fb76ef9a 100644
--- a/examples/linear_model/plot_ard.py
+++ b/examples/linear_model/plot_ard.py
@@ -54,7 +54,8 @@
 # coefficients.
 
 import pandas as pd
-from sklearn.linear_model import ARDRegression, LinearRegression, BayesianRidge
+
+from sklearn.linear_model import ARDRegression, BayesianRidge, LinearRegression
 
 olr = LinearRegression().fit(X, y)
 brr = BayesianRidge(compute_score=True, n_iter=30).fit(X, y)
diff --git a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
index 3bca3101758ff..b31d95348c083 100644
--- a/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
+++ b/examples/linear_model/plot_elastic_net_precomputed_gram_matrix_with_weighted_samples.py
@@ -4,7 +4,7 @@
 ==========================================================================
 
 The following example shows how to precompute the gram matrix
-while using weighted samples with an ElasticNet.
+while using weighted samples with an :class:`~sklearn.linear_model.ElasticNet`.
 
 If weighted samples are used, the design matrix must be centered and then
 rescaled by the square root of the weight vector before the gram matrix
@@ -13,13 +13,14 @@
 .. note::
   `sample_weight` vector is also rescaled to sum to `n_samples`, see the
    documentation for the `sample_weight` parameter to
-   :func:`linear_model.ElasticNet.fit`.
+   :meth:`~sklearn.linear_model.ElasticNet.fit`.
 
 """
 
 # %%
 # Let's start by loading the dataset and creating some sample weights.
 import numpy as np
+
 from sklearn.datasets import make_regression
 
 rng = np.random.RandomState(0)
diff --git a/examples/linear_model/plot_huber_vs_ridge.py b/examples/linear_model/plot_huber_vs_ridge.py
index 2ea5a190e35d8..7c0222b71a721 100644
--- a/examples/linear_model/plot_huber_vs_ridge.py
+++ b/examples/linear_model/plot_huber_vs_ridge.py
@@ -16,8 +16,8 @@
 # Authors: Manoj Kumar mks542@nyu.edu
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.datasets import make_regression
 from sklearn.linear_model import HuberRegressor, Ridge
diff --git a/examples/linear_model/plot_iris_logistic.py b/examples/linear_model/plot_iris_logistic.py
index faf547c783609..b1e4d76c7f221 100644
--- a/examples/linear_model/plot_iris_logistic.py
+++ b/examples/linear_model/plot_iris_logistic.py
@@ -15,9 +15,10 @@
 # License: BSD 3 clause
 
 import matplotlib.pyplot as plt
-from sklearn.linear_model import LogisticRegression
+
 from sklearn import datasets
 from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.linear_model import LogisticRegression
 
 # import some data to play with
 iris = datasets.load_iris()
diff --git a/examples/linear_model/plot_lasso_and_elasticnet.py b/examples/linear_model/plot_lasso_and_elasticnet.py
index b08837304730a..075d8a50d2f62 100644
--- a/examples/linear_model/plot_lasso_and_elasticnet.py
+++ b/examples/linear_model/plot_lasso_and_elasticnet.py
@@ -112,9 +112,10 @@
 # :class:`~sklearn.model_selection.TimeSeriesSplit` cross-validation strategy to a
 # :class:`~sklearn.linear_model.LassoCV`. To keep the example simple and fast to
 # execute, we directly set the optimal value for alpha here.
+from time import time
+
 from sklearn.linear_model import Lasso
 from sklearn.metrics import r2_score
-from time import time
 
 t0 = time()
 lasso = Lasso(alpha=0.14).fit(X_train, y_train)
@@ -181,8 +182,8 @@
 # and estimated coefficients of the respective linear models.
 
 import matplotlib.pyplot as plt
-import seaborn as sns
 import pandas as pd
+import seaborn as sns
 from matplotlib.colors import SymLogNorm
 
 df = pd.DataFrame(
diff --git a/examples/linear_model/plot_lasso_coordinate_descent_path.py b/examples/linear_model/plot_lasso_coordinate_descent_path.py
index 1796dc5011644..ee2f09f000d23 100644
--- a/examples/linear_model/plot_lasso_coordinate_descent_path.py
+++ b/examples/linear_model/plot_lasso_coordinate_descent_path.py
@@ -14,12 +14,12 @@
 # License: BSD 3 clause
 
 from itertools import cycle
-import numpy as np
+
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn.linear_model import lasso_path, enet_path
 from sklearn import datasets
-
+from sklearn.linear_model import enet_path, lasso_path
 
 X, y = datasets.load_diabetes(return_X_y=True)
 
diff --git a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py
index 8da1820c0b0c4..a797d5d708160 100644
--- a/examples/linear_model/plot_lasso_dense_vs_sparse_data.py
+++ b/examples/linear_model/plot_lasso_dense_vs_sparse_data.py
@@ -9,13 +9,12 @@
 """
 
 from time import time
-from scipy import sparse
-from scipy import linalg
+
+from scipy import linalg, sparse
 
 from sklearn.datasets import make_regression
 from sklearn.linear_model import Lasso
 
-
 # %%
 # Comparing the two Lasso implementations on Dense data
 # -----------------------------------------------------
diff --git a/examples/linear_model/plot_lasso_lars.py b/examples/linear_model/plot_lasso_lars.py
index 6788b8b1d1598..5444aeec90c65 100644
--- a/examples/linear_model/plot_lasso_lars.py
+++ b/examples/linear_model/plot_lasso_lars.py
@@ -14,11 +14,10 @@
 #         Alexandre Gramfort <alexandre.gramfort@inria.fr>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn import linear_model
-from sklearn import datasets
+from sklearn import datasets, linear_model
 
 X, y = datasets.load_diabetes(return_X_y=True)
 
diff --git a/examples/linear_model/plot_lasso_lars_ic.py b/examples/linear_model/plot_lasso_lars_ic.py
index 95c0d0d66608d..8f1e7034a108a 100644
--- a/examples/linear_model/plot_lasso_lars_ic.py
+++ b/examples/linear_model/plot_lasso_lars_ic.py
@@ -38,16 +38,16 @@
 
 # %%
 # Scikit-learn provides an estimator called
-# :class:`~sklearn.linear_model.LinearLarsIC` that uses either Akaike's
+# :class:`~sklearn.linear_model.LassoLarsIC` that uses either Akaike's
 # information criterion (AIC) or the Bayesian information criterion (BIC) to
 # select the best model. Before fitting
 # this model, we will scale the dataset.
 #
 # In the following, we are going to fit two models to compare the values
 # reported by AIC and BIC.
-from sklearn.preprocessing import StandardScaler
 from sklearn.linear_model import LassoLarsIC
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 
 lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y)
 
diff --git a/examples/linear_model/plot_lasso_model_selection.py b/examples/linear_model/plot_lasso_model_selection.py
index 7735f01987aa9..169d85ed81644 100644
--- a/examples/linear_model/plot_lasso_model_selection.py
+++ b/examples/linear_model/plot_lasso_model_selection.py
@@ -59,9 +59,10 @@
 #
 # We will first fit a Lasso model with the AIC criterion.
 import time
-from sklearn.preprocessing import StandardScaler
+
 from sklearn.linear_model import LassoLarsIC
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 
 start_time = time.time()
 lasso_lars_ic = make_pipeline(StandardScaler(), LassoLarsIC(criterion="aic")).fit(X, y)
diff --git a/examples/linear_model/plot_logistic.py b/examples/linear_model/plot_logistic.py
index 801c893e5e28e..6ed3c86e8c27b 100644
--- a/examples/linear_model/plot_logistic.py
+++ b/examples/linear_model/plot_logistic.py
@@ -15,6 +15,7 @@
 import matplotlib.pyplot as plt
 import numpy as np
 from scipy.special import expit
+
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 # Generate a toy dataset, it's just a straight line with some Gaussian noise:
diff --git a/examples/linear_model/plot_logistic_l1_l2_sparsity.py b/examples/linear_model/plot_logistic_l1_l2_sparsity.py
index e8f5a2d51b637..80374d3833151 100644
--- a/examples/linear_model/plot_logistic_l1_l2_sparsity.py
+++ b/examples/linear_model/plot_logistic_l1_l2_sparsity.py
@@ -20,11 +20,11 @@
 #          Andreas Mueller <amueller@ais.uni-bonn.de>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn.linear_model import LogisticRegression
 from sklearn import datasets
+from sklearn.linear_model import LogisticRegression
 from sklearn.preprocessing import StandardScaler
 
 X, y = datasets.load_digits(return_X_y=True)
diff --git a/examples/linear_model/plot_logistic_multinomial.py b/examples/linear_model/plot_logistic_multinomial.py
index 814eeadaa68c4..791a788b2238b 100644
--- a/examples/linear_model/plot_logistic_multinomial.py
+++ b/examples/linear_model/plot_logistic_multinomial.py
@@ -12,11 +12,12 @@
 # Authors: Tom Dupre la Tour <tom.dupre-la-tour@m4x.org>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.datasets import make_blobs
-from sklearn.linear_model import LogisticRegression
 from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.linear_model import LogisticRegression
 
 # make 3-class dataset for classification
 centers = [[-5, 0], [0, 1.5], [5, -1]]
diff --git a/examples/linear_model/plot_multi_task_lasso_support.py b/examples/linear_model/plot_multi_task_lasso_support.py
index a30b51ed7a7fe..9b6ea64ce4d85 100644
--- a/examples/linear_model/plot_multi_task_lasso_support.py
+++ b/examples/linear_model/plot_multi_task_lasso_support.py
@@ -39,7 +39,7 @@
 # Fit models
 # ----------
 
-from sklearn.linear_model import MultiTaskLasso, Lasso
+from sklearn.linear_model import Lasso, MultiTaskLasso
 
 coef_lasso_ = np.array([Lasso(alpha=0.5).fit(X, y).coef_ for y in Y.T])
 coef_multi_task_lasso_ = MultiTaskLasso(alpha=1.0).fit(X, Y).coef_
diff --git a/examples/linear_model/plot_nnls.py b/examples/linear_model/plot_nnls.py
index c8ba2914d783a..05a8550ec166b 100644
--- a/examples/linear_model/plot_nnls.py
+++ b/examples/linear_model/plot_nnls.py
@@ -9,8 +9,9 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.metrics import r2_score
 
 # %%
diff --git a/examples/linear_model/plot_ols.py b/examples/linear_model/plot_ols.py
index 0618f545306db..244bd86387474 100644
--- a/examples/linear_model/plot_ols.py
+++ b/examples/linear_model/plot_ols.py
@@ -19,6 +19,7 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
+
 from sklearn import datasets, linear_model
 from sklearn.metrics import mean_squared_error, r2_score
 
diff --git a/examples/linear_model/plot_ols_3d.py b/examples/linear_model/plot_ols_3d.py
index 7288cc9ae6594..0c95d483f1bf3 100644
--- a/examples/linear_model/plot_ols_3d.py
+++ b/examples/linear_model/plot_ols_3d.py
@@ -16,9 +16,10 @@
 # %%
 # First we load the diabetes dataset.
 
-from sklearn import datasets
 import numpy as np
 
+from sklearn import datasets
+
 X, y = datasets.load_diabetes(return_X_y=True)
 indices = (0, 1)
 
diff --git a/examples/linear_model/plot_ols_ridge_variance.py b/examples/linear_model/plot_ols_ridge_variance.py
index b02ab193842d4..a03d9c253c1cf 100644
--- a/examples/linear_model/plot_ols_ridge_variance.py
+++ b/examples/linear_model/plot_ols_ridge_variance.py
@@ -24,8 +24,8 @@
 # License: BSD 3 clause
 
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn import linear_model
 
diff --git a/examples/linear_model/plot_omp.py b/examples/linear_model/plot_omp.py
index 9329962cce4f6..aa6044173b8ce 100644
--- a/examples/linear_model/plot_omp.py
+++ b/examples/linear_model/plot_omp.py
@@ -10,9 +10,9 @@
 
 import matplotlib.pyplot as plt
 import numpy as np
-from sklearn.linear_model import OrthogonalMatchingPursuit
-from sklearn.linear_model import OrthogonalMatchingPursuitCV
+
 from sklearn.datasets import make_sparse_coded_signal
+from sklearn.linear_model import OrthogonalMatchingPursuit, OrthogonalMatchingPursuitCV
 
 n_components, n_features = 512, 100
 n_nonzero_coefs = 17
diff --git a/examples/linear_model/plot_poisson_regression_non_normal_loss.py b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
index 46f5c23578b55..cf38ca520f076 100644
--- a/examples/linear_model/plot_poisson_regression_non_normal_loss.py
+++ b/examples/linear_model/plot_poisson_regression_non_normal_loss.py
@@ -41,21 +41,18 @@
 #          Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 
-
 ##############################################################################
 # The French Motor Third-Party Liability Claims dataset
 # -----------------------------------------------------
 #
 # Let's load the motor claim dataset from OpenML:
 # https://www.openml.org/d/41214
-
 from sklearn.datasets import fetch_openml
 
-
 df = fetch_openml(data_id=41214, as_frame=True, parser="pandas").frame
 df
 
@@ -97,11 +94,14 @@
 # In order to fit linear models with those predictors it is therefore
 # necessary to perform standard feature transformations as follows:
 
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
-from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
 from sklearn.compose import ColumnTransformer
-
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import (
+    FunctionTransformer,
+    KBinsDiscretizer,
+    OneHotEncoder,
+    StandardScaler,
+)
 
 log_scale_transformer = make_pipeline(
     FunctionTransformer(np.log, validate=False), StandardScaler()
@@ -139,8 +139,8 @@
 # the training sample.
 
 from sklearn.dummy import DummyRegressor
-from sklearn.pipeline import Pipeline
 from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
 
 df_train, df_test = train_test_split(df, test_size=0.33, random_state=0)
 
@@ -156,9 +156,11 @@
 # Let's compute the performance of this constant prediction baseline with 3
 # different regression metrics:
 
-from sklearn.metrics import mean_squared_error
-from sklearn.metrics import mean_absolute_error
-from sklearn.metrics import mean_poisson_deviance
+from sklearn.metrics import (
+    mean_absolute_error,
+    mean_poisson_deviance,
+    mean_squared_error,
+)
 
 
 def score_estimator(estimator, df_test):
@@ -213,7 +215,6 @@ def score_estimator(estimator, df_test):
 
 from sklearn.linear_model import Ridge
 
-
 ridge_glm = Pipeline(
     [
         ("preprocessor", linear_model_preprocessor),
@@ -285,7 +286,6 @@ def score_estimator(estimator, df_test):
 from sklearn.ensemble import HistGradientBoostingRegressor
 from sklearn.preprocessing import OrdinalEncoder
 
-
 tree_preprocessor = ColumnTransformer(
     [
         (
diff --git a/examples/linear_model/plot_polynomial_interpolation.py b/examples/linear_model/plot_polynomial_interpolation.py
index ac2fe28de870d..f648b7aea762d 100644
--- a/examples/linear_model/plot_polynomial_interpolation.py
+++ b/examples/linear_model/plot_polynomial_interpolation.py
@@ -42,13 +42,12 @@
 #         Malte Londschien
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.linear_model import Ridge
-from sklearn.preprocessing import PolynomialFeatures, SplineTransformer
 from sklearn.pipeline import make_pipeline
-
+from sklearn.preprocessing import PolynomialFeatures, SplineTransformer
 
 # %%
 # We start by defining a function that we intend to approximate and prepare
diff --git a/examples/linear_model/plot_quantile_regression.py b/examples/linear_model/plot_quantile_regression.py
index b66434fa1c0c1..715e6129cdef8 100644
--- a/examples/linear_model/plot_quantile_regression.py
+++ b/examples/linear_model/plot_quantile_regression.py
@@ -111,7 +111,7 @@
 #
 # We will use the quantiles at 5% and 95% to find the outliers in the training
 # sample beyond the central 90% interval.
-from sklearn.utils.fixes import sp_version, parse_version
+from sklearn.utils.fixes import parse_version, sp_version
 
 # This is line is to avoid incompatibility if older SciPy version.
 # You should use `solver="highs"` with recent version of SciPy.
@@ -253,8 +253,7 @@
 # distributed target to make it more interesting as mean and median are not
 # equal.
 from sklearn.linear_model import LinearRegression
-from sklearn.metrics import mean_absolute_error
-from sklearn.metrics import mean_squared_error
+from sklearn.metrics import mean_absolute_error, mean_squared_error
 
 linear_regression = LinearRegression()
 quantile_regression = QuantileRegressor(quantile=0.5, alpha=0, solver=solver)
diff --git a/examples/linear_model/plot_ransac.py b/examples/linear_model/plot_ransac.py
index 0301dd0ba0088..7b89150c4bd20 100644
--- a/examples/linear_model/plot_ransac.py
+++ b/examples/linear_model/plot_ransac.py
@@ -18,8 +18,7 @@
 import numpy as np
 from matplotlib import pyplot as plt
 
-from sklearn import linear_model, datasets
-
+from sklearn import datasets, linear_model
 
 n_samples = 1000
 n_outliers = 50
diff --git a/examples/linear_model/plot_ridge_coeffs.py b/examples/linear_model/plot_ridge_coeffs.py
index bfe6d818b2f37..4bfb1f4c29325 100644
--- a/examples/linear_model/plot_ridge_coeffs.py
+++ b/examples/linear_model/plot_ridge_coeffs.py
@@ -1,89 +1,180 @@
 """
-==============================================================
-Plot Ridge coefficients as a function of the L2 regularization
-==============================================================
-
-.. currentmodule:: sklearn.linear_model
-
-:class:`Ridge` Regression is the estimator used in this example.
-Each color in the left plot represents one different dimension of the
-coefficient vector, and this is displayed as a function of the
-regularization parameter. The right plot shows how exact the solution
-is. This example illustrates how a well defined solution is
-found by Ridge regression and how regularization affects the
-coefficients and their values. The plot on the right shows how
-the difference of the coefficients from the estimator changes
-as a function of regularization.
-
-In this example the dependent variable Y is set as a function
-of the input features: y = X*w + c. The coefficient vector w is
-randomly sampled from a normal distribution, whereas the bias term c is
-set to a constant.
-
-As alpha tends toward zero the coefficients found by Ridge
-regression stabilize towards the randomly sampled vector w.
-For big alpha (strong regularisation) the coefficients
-are smaller (eventually converging at 0) leading to a
-simpler and biased solution.
-These dependencies can be observed on the left plot.
-
-The right plot shows the mean squared error between the
-coefficients found by the model and the chosen vector w.
-Less regularised models retrieve the exact
-coefficients (error is equal to 0), stronger regularised
-models increase the error.
-
-Please note that in this example the data is non-noisy, hence
-it is possible to extract the exact coefficients.
-
+=========================================================
+Ridge coefficients as a function of the L2 Regularization
+=========================================================
+
+A model that overfits learns the training data too well, capturing both the
+underlying patterns and the noise in the data. However, when applied to unseen
+data, the learned associations may not hold. We normally detect this when we
+apply our trained predictions to the test data and see the statistical
+performance drop significantly compared to the training data.
+
+One way to overcome overfitting is through regularization, which can be done by
+penalizing large weights (coefficients) in linear models, forcing the model to
+shrink all coefficients. Regularization reduces a model's reliance on specific
+information obtained from the training samples.
+
+This example illustrates how L2 regularization in a
+:class:`~sklearn.linear_model.Ridge` regression affects a model's performance by
+adding a penalty term to the loss that increases with the coefficients
+:math:`\\beta`.
+
+The regularized loss function is given by: :math:`\\mathcal{L}(X, y, \\beta) =
+\\| y - X \\beta \\|^{2}_{2} + \\alpha \\| \\beta \\|^{2}_{2}`
+
+where :math:`X` is the input data, :math:`y` is the target variable,
+:math:`\\beta` is the vector of coefficients associated with the features, and
+:math:`\\alpha` is the regularization strength.
+
+The regularized loss function aims to balance the trade-off between accurately
+predicting the training set and to prevent overfitting.
+
+In this regularized loss, the left-hand side (e.g. :math:`\\|y -
+X\\beta\\|^{2}_{2}`) measures the squared difference between the actual target
+variable, :math:`y`, and the predicted values. Minimizing this term alone could
+lead to overfitting, as the model may become too complex and sensitive to noise
+in the training data.
+
+To address overfitting, Ridge regularization adds a constraint, called a penalty
+term, (:math:`\\alpha \\| \\beta\\|^{2}_{2}`) to the loss function. This penalty
+term is the sum of the squares of the model's coefficients, multiplied by the
+regularization strength :math:`\\alpha`. By introducing this constraint, Ridge
+regularization discourages any single coefficient :math:`\\beta_{i}` from taking
+an excessively large value and encourages smaller and more evenly distributed
+coefficients. Higher values of :math:`\\alpha` force the coefficients towards
+zero. However, an excessively high :math:`\\alpha` can result in an underfit
+model that fails to capture important patterns in the data.
+
+Therefore, the regularized loss function combines the prediction accuracy term
+and the penalty term. By adjusting the regularization strength, practitioners
+can fine-tune the degree of constraint imposed on the weights, training a model
+capable of generalizing well to unseen data while avoiding overfitting.
 """
 
 # Author: Kornel Kielczewski -- <kornel.k@plusnet.pl>
 
-import matplotlib.pyplot as plt
+# %%
+# Purpose of this example
+# -----------------------
+# For the purpose of showing how Ridge regularization works, we will create a
+# non-noisy data set. Then we will train a regularized model on a range of
+# regularization strengths (:math:`\alpha`) and plot how the trained
+# coefficients and the mean squared error between those and the original values
+# behave as functions of the regularization strength.
+#
+# Creating a non-noisy data set
+# *****************************
+# We make a toy data set with 100 samples and 10 features, that's suitable to
+# detect regression. Out of the 10 features, 8 are informative and contribute to
+# the regression, while the remaining 2 features do not have any effect on the
+# target variable (their true coefficients are 0). Please note that in this
+# example the data is non-noisy, hence we can expect our regression model to
+# recover exactly the true coefficients w.
+from sklearn.datasets import make_regression
+
+X, y, w = make_regression(
+    n_samples=100, n_features=10, n_informative=8, coef=True, random_state=1
+)
+
+# Obtain the true coefficients
+print(f"The true coefficient of this regression problem are:\n{w}")
+
+# %%
+# Training the Ridge Regressor
+# ****************************
+# We use :class:`~sklearn.linear_model.Ridge`, a linear model with L2
+# regularization. We train several models, each with a different value for the
+# model parameter `alpha`, which is a positive constant that multiplies the
+# penalty term, controlling the regularization strength. For each trained model
+# we then compute the error between the true coefficients `w` and the
+# coefficients found by the model `clf`. We store the identified coefficients
+# and the calculated errors for the corresponding coefficients in lists, which
+# makes it convenient for us to plot them.
 import numpy as np
 
-from sklearn.datasets import make_regression
 from sklearn.linear_model import Ridge
 from sklearn.metrics import mean_squared_error
 
 clf = Ridge()
 
-X, y, w = make_regression(
-    n_samples=10, n_features=10, coef=True, random_state=1, bias=3.5
-)
-
+# Generate values for `alpha` that are evenly distributed on a logarithmic scale
+alphas = np.logspace(-3, 4, 200)
 coefs = []
-errors = []
-
-alphas = np.logspace(-6, 6, 200)
+errors_coefs = []
 
 # Train the model with different regularisation strengths
 for a in alphas:
-    clf.set_params(alpha=a)
-    clf.fit(X, y)
+    clf.set_params(alpha=a).fit(X, y)
     coefs.append(clf.coef_)
-    errors.append(mean_squared_error(clf.coef_, w))
-
-# Display results
-plt.figure(figsize=(20, 6))
-
-plt.subplot(121)
-ax = plt.gca()
-ax.plot(alphas, coefs)
-ax.set_xscale("log")
-plt.xlabel("alpha")
-plt.ylabel("weights")
-plt.title("Ridge coefficients as a function of the regularization")
-plt.axis("tight")
-
-plt.subplot(122)
-ax = plt.gca()
-ax.plot(alphas, errors)
-ax.set_xscale("log")
-plt.xlabel("alpha")
-plt.ylabel("error")
-plt.title("Coefficient error as a function of the regularization")
-plt.axis("tight")
-
-plt.show()
+    errors_coefs.append(mean_squared_error(clf.coef_, w))
+
+# %%
+# Plotting trained Coefficients and Mean Squared Errors
+# *****************************************************
+# We now plot the 10 different regularized coefficients as a function of the
+# regularization parameter `alpha` where each color represents a different
+# coefficient.
+#
+# On the right-hand-side, we plot how the errors of the coefficients from the
+# estimator change as a function of regularization.
+import matplotlib.pyplot as plt
+import pandas as pd
+
+alphas = pd.Index(alphas, name="alpha")
+coefs = pd.DataFrame(coefs, index=alphas, columns=[f"Feature {i}" for i in range(10)])
+errors = pd.Series(errors_coefs, index=alphas, name="Mean squared error")
+
+fig, axs = plt.subplots(1, 2, figsize=(20, 6))
+
+coefs.plot(
+    ax=axs[0],
+    logx=True,
+    title="Ridge coefficients as a function of the regularization strength",
+)
+axs[0].set_ylabel("Ridge coefficient values")
+errors.plot(
+    ax=axs[1],
+    logx=True,
+    title="Coefficient error as a function of the regularization strength",
+)
+_ = axs[1].set_ylabel("Mean squared error")
+# %%
+# Interpreting the plots
+# **********************
+# The plot on the left-hand side shows how the regularization strength (`alpha`)
+# affects the Ridge regression coefficients. Smaller values of `alpha` (weak
+# regularization), allow the coefficients to closely resemble the true
+# coefficients (`w`) used to generate the data set. This is because no
+# additional noise was added to our artificial data set. As `alpha` increases,
+# the coefficients shrink towards zero, gradually reducing the impact of the
+# features that were formerly more significant.
+#
+# The right-hand side plot shows the mean squared error (MSE) between the
+# coefficients found by the model and the true coefficients (`w`). It provides a
+# measure that relates to how exact our ridge model is in comparison to the true
+# generative model. A low error means that it found coefficients closer to the
+# ones of the true generative model. In this case, since our toy data set was
+# non-noisy, we can see that the least regularized model retrieves coefficients
+# closest to the true coefficients (`w`) (error is close to 0).
+#
+# When `alpha` is small, the model captures the intricate details of the
+# training data, whether those were caused by noise or by actual information. As
+# `alpha` increases, the highest coefficients shrink more rapidly, rendering
+# their corresponding features less influential in the training process. This
+# can enhance a model's ability to generalize to unseen data (if there was a lot
+# of noise to capture), but it also poses the risk of losing performance if the
+# regularization becomes too strong compared to the amount of noise the data
+# contained (as in this example).
+#
+# In real-world scenarios where data typically includes noise, selecting an
+# appropriate `alpha` value becomes crucial in striking a balance between an
+# overfitting and an underfitting model.
+#
+# Here, we saw that :class:`~sklearn.linear_model.Ridge` adds a penalty to the
+# coefficients to fight overfitting. Another problem that occurs is linked to
+# the presence of outliers in the training dataset. An outlier is a data point
+# that differs significantly from other observations. Concretely, these outliers
+# impact the left-hand side term of the loss function that we showed earlier.
+# Some other linear models are formulated to be robust to outliers such as the
+# :class:`~sklearn.linear_model.HuberRegressor`. You can learn more about it in
+# the :ref:`sphx_glr_auto_examples_linear_model_plot_huber_vs_ridge.py` example.
diff --git a/examples/linear_model/plot_ridge_path.py b/examples/linear_model/plot_ridge_path.py
index 66f8fd9eb6c23..01f9d45a63f8d 100644
--- a/examples/linear_model/plot_ridge_path.py
+++ b/examples/linear_model/plot_ridge_path.py
@@ -30,8 +30,9 @@
 # Author: Fabian Pedregosa -- <fabian.pedregosa@inria.fr>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import linear_model
 
 # X is the 10x10 Hilbert matrix
diff --git a/examples/linear_model/plot_robust_fit.py b/examples/linear_model/plot_robust_fit.py
index c9fe49fc0d416..79213c9a8e83e 100644
--- a/examples/linear_model/plot_robust_fit.py
+++ b/examples/linear_model/plot_robust_fit.py
@@ -30,18 +30,18 @@
 
 """
 
-from matplotlib import pyplot as plt
 import numpy as np
+from matplotlib import pyplot as plt
 
 from sklearn.linear_model import (
+    HuberRegressor,
     LinearRegression,
-    TheilSenRegressor,
     RANSACRegressor,
-    HuberRegressor,
+    TheilSenRegressor,
 )
 from sklearn.metrics import mean_squared_error
-from sklearn.preprocessing import PolynomialFeatures
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import PolynomialFeatures
 
 np.random.seed(42)
 
diff --git a/examples/linear_model/plot_sgd_comparison.py b/examples/linear_model/plot_sgd_comparison.py
index 5ab0d6b1b2827..0477e42cf5947 100644
--- a/examples/linear_model/plot_sgd_comparison.py
+++ b/examples/linear_model/plot_sgd_comparison.py
@@ -9,14 +9,17 @@
 # Author: Rob Zinkov <rob at zinkov dot com>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
-from sklearn import datasets
+import numpy as np
 
+from sklearn import datasets
+from sklearn.linear_model import (
+    LogisticRegression,
+    PassiveAggressiveClassifier,
+    Perceptron,
+    SGDClassifier,
+)
 from sklearn.model_selection import train_test_split
-from sklearn.linear_model import SGDClassifier, Perceptron
-from sklearn.linear_model import PassiveAggressiveClassifier
-from sklearn.linear_model import LogisticRegression
 
 heldout = [0.95, 0.90, 0.75, 0.50, 0.01]
 # Number of rounds to fit and evaluate an estimator.
diff --git a/examples/linear_model/plot_sgd_early_stopping.py b/examples/linear_model/plot_sgd_early_stopping.py
index 4fb884804492d..6713a74342ba2 100644
--- a/examples/linear_model/plot_sgd_early_stopping.py
+++ b/examples/linear_model/plot_sgd_early_stopping.py
@@ -41,19 +41,19 @@
 #
 # License: BSD 3 clause
 
-import time
 import sys
+import time
 
-import pandas as pd
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
 
 from sklearn import linear_model
 from sklearn.datasets import fetch_openml
-from sklearn.model_selection import train_test_split
-from sklearn.utils._testing import ignore_warnings
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.model_selection import train_test_split
 from sklearn.utils import shuffle
+from sklearn.utils._testing import ignore_warnings
 
 
 def load_mnist(n_samples=None, class_0="0", class_1="8"):
diff --git a/examples/linear_model/plot_sgd_iris.py b/examples/linear_model/plot_sgd_iris.py
index 64dca07396d54..5d9b923f9b444 100644
--- a/examples/linear_model/plot_sgd_iris.py
+++ b/examples/linear_model/plot_sgd_iris.py
@@ -9,11 +9,12 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import datasets
-from sklearn.linear_model import SGDClassifier
 from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.linear_model import SGDClassifier
 
 # import some data to play with
 iris = datasets.load_iris()
diff --git a/examples/linear_model/plot_sgd_loss_functions.py b/examples/linear_model/plot_sgd_loss_functions.py
index a1f74dca4d6af..140562184b946 100644
--- a/examples/linear_model/plot_sgd_loss_functions.py
+++ b/examples/linear_model/plot_sgd_loss_functions.py
@@ -8,8 +8,8 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 
 def modified_huber_loss(y_true, y_pred):
diff --git a/examples/linear_model/plot_sgd_penalties.py b/examples/linear_model/plot_sgd_penalties.py
index 0413751fb41a9..ff71dba5f20a3 100644
--- a/examples/linear_model/plot_sgd_penalties.py
+++ b/examples/linear_model/plot_sgd_penalties.py
@@ -11,8 +11,8 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 l1_color = "navy"
 l2_color = "c"
diff --git a/examples/linear_model/plot_sgd_separating_hyperplane.py b/examples/linear_model/plot_sgd_separating_hyperplane.py
index af288fcd3dde0..e84ab7c519ae9 100644
--- a/examples/linear_model/plot_sgd_separating_hyperplane.py
+++ b/examples/linear_model/plot_sgd_separating_hyperplane.py
@@ -9,10 +9,11 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
-from sklearn.linear_model import SGDClassifier
+import numpy as np
+
 from sklearn.datasets import make_blobs
+from sklearn.linear_model import SGDClassifier
 
 # we create 50 separable points
 X, Y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)
diff --git a/examples/linear_model/plot_sgd_weighted_samples.py b/examples/linear_model/plot_sgd_weighted_samples.py
index 2db52042b075f..4d605e99b4e49 100644
--- a/examples/linear_model/plot_sgd_weighted_samples.py
+++ b/examples/linear_model/plot_sgd_weighted_samples.py
@@ -8,8 +8,9 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import linear_model
 
 # we create 20 points
diff --git a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py
index c25f4a84d91e0..2f03768f50532 100644
--- a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py
+++ b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py
@@ -19,13 +19,14 @@
 
 """  # noqa: E501
 
-import numpy as np
-import matplotlib.pyplot as plt
 import matplotlib
-from sklearn.svm import OneClassSVM
-from sklearn.linear_model import SGDOneClassSVM
+import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.kernel_approximation import Nystroem
+from sklearn.linear_model import SGDOneClassSVM
 from sklearn.pipeline import make_pipeline
+from sklearn.svm import OneClassSVM
 
 font = {"weight": "normal", "size": 15}
 
diff --git a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
index 507dda5c76901..f62208aab154a 100644
--- a/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
+++ b/examples/linear_model/plot_sparse_logistic_regression_20newsgroups.py
@@ -29,9 +29,9 @@
 import numpy as np
 
 from sklearn.datasets import fetch_20newsgroups_vectorized
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
-from sklearn.exceptions import ConvergenceWarning
 
 warnings.filterwarnings("ignore", category=ConvergenceWarning, module="sklearn")
 t0 = timeit.default_timer()
diff --git a/examples/linear_model/plot_sparse_logistic_regression_mnist.py b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
index 37327aeaa4cb7..e6746b8fb0896 100644
--- a/examples/linear_model/plot_sparse_logistic_regression_mnist.py
+++ b/examples/linear_model/plot_sparse_logistic_regression_mnist.py
@@ -21,6 +21,7 @@
 # License: BSD 3 clause
 
 import time
+
 import matplotlib.pyplot as plt
 import numpy as np
 
diff --git a/examples/linear_model/plot_theilsen.py b/examples/linear_model/plot_theilsen.py
index b380baf705a76..eb0ac4966841d 100644
--- a/examples/linear_model/plot_theilsen.py
+++ b/examples/linear_model/plot_theilsen.py
@@ -39,10 +39,11 @@
 # License: BSD 3 clause
 
 import time
-import numpy as np
+
 import matplotlib.pyplot as plt
-from sklearn.linear_model import LinearRegression, TheilSenRegressor
-from sklearn.linear_model import RANSACRegressor
+import numpy as np
+
+from sklearn.linear_model import LinearRegression, RANSACRegressor, TheilSenRegressor
 
 estimators = [
     ("OLS", LinearRegression()),
diff --git a/examples/linear_model/plot_tweedie_regression_insurance_claims.py b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
index 1d7a5c5ed179f..a1894eaa88ed2 100644
--- a/examples/linear_model/plot_tweedie_regression_insurance_claims.py
+++ b/examples/linear_model/plot_tweedie_regression_insurance_claims.py
@@ -46,14 +46,16 @@
 
 from functools import partial
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 import pandas as pd
 
 from sklearn.datasets import fetch_openml
-from sklearn.metrics import mean_tweedie_deviance
-from sklearn.metrics import mean_absolute_error
-from sklearn.metrics import mean_squared_error
+from sklearn.metrics import (
+    mean_absolute_error,
+    mean_squared_error,
+    mean_tweedie_deviance,
+)
 
 
 def load_mtpl2(n_samples=None):
@@ -209,11 +211,14 @@ def score_estimator(
 # containing the number of claims (``ClaimNb``), with the freMTPL2sev table,
 # containing the claim amount (``ClaimAmount``) for the same policy ids
 # (``IDpol``).
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import FunctionTransformer, OneHotEncoder
-from sklearn.preprocessing import StandardScaler, KBinsDiscretizer
 from sklearn.compose import ColumnTransformer
-
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import (
+    FunctionTransformer,
+    KBinsDiscretizer,
+    OneHotEncoder,
+    StandardScaler,
+)
 
 df = load_mtpl2()
 
@@ -274,9 +279,8 @@ def score_estimator(
 # constant rate in a given time interval (``Exposure``, in units of years).
 # Here we model the frequency ``y = ClaimNb / Exposure``, which is still a
 # (scaled) Poisson distribution, and use ``Exposure`` as `sample_weight`.
-from sklearn.model_selection import train_test_split
 from sklearn.linear_model import PoissonRegressor
-
+from sklearn.model_selection import train_test_split
 
 df_train, df_test, X_train, X_test = train_test_split(df, X, random_state=0)
 
@@ -396,7 +400,6 @@ def score_estimator(
 #   more than one claim.
 from sklearn.linear_model import GammaRegressor
 
-
 mask_train = df_train["ClaimAmount"] > 0
 mask_test = df_test["ClaimAmount"] > 0
 
@@ -451,9 +454,9 @@ def score_estimator(
 # %%
 #
 # We conclude that the claim amount is very challenging to predict. Still, the
-# :class:`~sklearn.linear.GammaRegressor` is able to leverage some information
-# from the input features to slightly improve upon the mean baseline in terms
-# of D².
+# :class:`~sklearn.linear_model.GammaRegressor` is able to leverage some
+# information from the input features to slightly improve upon the mean
+# baseline in terms of D².
 #
 # Note that the resulting model is the average claim amount per claim. As such,
 # it is conditional on having at least one claim, and cannot be used to predict
@@ -540,7 +543,6 @@ def score_estimator(
 # regardless of `power`.
 from sklearn.linear_model import TweedieRegressor
 
-
 glm_pure_premium = TweedieRegressor(power=1.9, alpha=0.1, solver="newton-cholesky")
 glm_pure_premium.fit(
     X_train, df_train["PurePremium"], sample_weight=df_train["Exposure"]
diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py
index 3773f11605241..139964cfe93d4 100644
--- a/examples/manifold/plot_compare_methods.py
+++ b/examples/manifold/plot_compare_methods.py
@@ -29,12 +29,12 @@
 # We start by generating the S-curve dataset.
 
 import matplotlib.pyplot as plt
-from matplotlib import ticker
 
 # unused but required import for doing 3d projections with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401
+from matplotlib import ticker
 
-from sklearn import manifold, datasets
+from sklearn import datasets, manifold
 
 n_samples = 1500
 S_points, S_color = datasets.make_s_curve(n_samples, random_state=0)
@@ -182,7 +182,7 @@ def add_2d_scatter(ax, points, points_color, title=None):
 # Read more in the :ref:`User Guide <spectral_embedding>`.
 
 spectral = manifold.SpectralEmbedding(
-    n_components=n_components, n_neighbors=n_neighbors
+    n_components=n_components, n_neighbors=n_neighbors, random_state=42
 )
 S_spectral = spectral.fit_transform(S_points)
 
diff --git a/examples/manifold/plot_lle_digits.py b/examples/manifold/plot_lle_digits.py
index 7d4b6610cee49..4424d700789ff 100644
--- a/examples/manifold/plot_lle_digits.py
+++ b/examples/manifold/plot_lle_digits.py
@@ -45,6 +45,7 @@
 # scattered across it.
 import numpy as np
 from matplotlib import offsetbox
+
 from sklearn.preprocessing import MinMaxScaler
 
 
@@ -103,11 +104,11 @@ def plot_embedding(X, title):
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.ensemble import RandomTreesEmbedding
 from sklearn.manifold import (
+    MDS,
+    TSNE,
     Isomap,
     LocallyLinearEmbedding,
-    MDS,
     SpectralEmbedding,
-    TSNE,
 )
 from sklearn.neighbors import NeighborhoodComponentsAnalysis
 from sklearn.pipeline import make_pipeline
diff --git a/examples/manifold/plot_manifold_sphere.py b/examples/manifold/plot_manifold_sphere.py
index 46db3f9f60e6d..144d696904c37 100644
--- a/examples/manifold/plot_manifold_sphere.py
+++ b/examples/manifold/plot_manifold_sphere.py
@@ -29,14 +29,16 @@
 # License: BSD 3 clause
 
 from time import time
-import numpy as np
+
 import matplotlib.pyplot as plt
-from matplotlib.ticker import NullFormatter
-from sklearn import manifold
-from sklearn.utils import check_random_state
 
 # Unused but required import for doing 3d projections with matplotlib < 3.2
 import mpl_toolkits.mplot3d  # noqa: F401
+import numpy as np
+from matplotlib.ticker import NullFormatter
+
+from sklearn import manifold
+from sklearn.utils import check_random_state
 
 # Variables for manifold learning.
 n_neighbors = 10
@@ -76,7 +78,7 @@
     t0 = time()
     trans_data = (
         manifold.LocallyLinearEmbedding(
-            n_neighbors=n_neighbors, n_components=2, method=method
+            n_neighbors=n_neighbors, n_components=2, method=method, random_state=42
         )
         .fit_transform(sphere_data)
         .T
@@ -110,7 +112,7 @@
 
 # Perform Multi-dimensional scaling.
 t0 = time()
-mds = manifold.MDS(2, max_iter=100, n_init=1, normalized_stress="auto")
+mds = manifold.MDS(2, max_iter=100, n_init=1, normalized_stress="auto", random_state=42)
 trans_data = mds.fit_transform(sphere_data).T
 t1 = time()
 print("MDS: %.2g sec" % (t1 - t0))
@@ -124,7 +126,9 @@
 
 # Perform Spectral Embedding.
 t0 = time()
-se = manifold.SpectralEmbedding(n_components=2, n_neighbors=n_neighbors)
+se = manifold.SpectralEmbedding(
+    n_components=2, n_neighbors=n_neighbors, random_state=42
+)
 trans_data = se.fit_transform(sphere_data).T
 t1 = time()
 print("Spectral Embedding: %.2g sec" % (t1 - t0))
diff --git a/examples/manifold/plot_mds.py b/examples/manifold/plot_mds.py
index 51f9745a33f59..2bb56f1f4ed2a 100644
--- a/examples/manifold/plot_mds.py
+++ b/examples/manifold/plot_mds.py
@@ -14,13 +14,12 @@
 # License: BSD
 
 import numpy as np
-
 from matplotlib import pyplot as plt
 from matplotlib.collections import LineCollection
 
 from sklearn import manifold
-from sklearn.metrics import euclidean_distances
 from sklearn.decomposition import PCA
+from sklearn.metrics import euclidean_distances
 
 EPSILON = np.finfo(np.float32).eps
 n_samples = 20
diff --git a/examples/manifold/plot_swissroll.py b/examples/manifold/plot_swissroll.py
index 4a71eb83cc972..fe17d9f80030f 100644
--- a/examples/manifold/plot_swissroll.py
+++ b/examples/manifold/plot_swissroll.py
@@ -15,8 +15,8 @@
 # We start by generating the Swiss Roll dataset.
 
 import matplotlib.pyplot as plt
-from sklearn import manifold, datasets
 
+from sklearn import datasets, manifold
 
 sr_points, sr_color = datasets.make_swiss_roll(n_samples=1500, random_state=0)
 
diff --git a/examples/manifold/plot_t_sne_perplexity.py b/examples/manifold/plot_t_sne_perplexity.py
index 014114a8a37d7..314458427f593 100644
--- a/examples/manifold/plot_t_sne_perplexity.py
+++ b/examples/manifold/plot_t_sne_perplexity.py
@@ -27,12 +27,13 @@
 # Author: Narine Kokhlikyan <narine@slice.com>
 # License: BSD
 
-import numpy as np
-import matplotlib.pyplot as plt
+from time import time
 
+import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.ticker import NullFormatter
-from sklearn import manifold, datasets
-from time import time
+
+from sklearn import datasets, manifold
 
 n_samples = 150
 n_components = 2
diff --git a/examples/miscellaneous/plot_anomaly_comparison.py b/examples/miscellaneous/plot_anomaly_comparison.py
index ef274bf98fbe5..7fb6b71e2a5c6 100644
--- a/examples/miscellaneous/plot_anomaly_comparison.py
+++ b/examples/miscellaneous/plot_anomaly_comparison.py
@@ -68,17 +68,17 @@
 
 import time
 
-import numpy as np
 import matplotlib
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn import svm
-from sklearn.datasets import make_moons, make_blobs
 from sklearn.covariance import EllipticEnvelope
+from sklearn.datasets import make_blobs, make_moons
 from sklearn.ensemble import IsolationForest
-from sklearn.neighbors import LocalOutlierFactor
-from sklearn.linear_model import SGDOneClassSVM
 from sklearn.kernel_approximation import Nystroem
+from sklearn.linear_model import SGDOneClassSVM
+from sklearn.neighbors import LocalOutlierFactor
 from sklearn.pipeline import make_pipeline
 
 matplotlib.rcParams["contour.negative_linestyle"] = "solid"
diff --git a/examples/miscellaneous/plot_display_object_visualization.py b/examples/miscellaneous/plot_display_object_visualization.py
index f108beced7a00..059fb5963fe57 100644
--- a/examples/miscellaneous/plot_display_object_visualization.py
+++ b/examples/miscellaneous/plot_display_object_visualization.py
@@ -24,10 +24,10 @@
 # data is split into a train and test dataset and a logistic regression is
 # fitted with the train dataset.
 from sklearn.datasets import fetch_openml
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import make_pipeline
 from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 
 X, y = fetch_openml(data_id=1464, return_X_y=True, parser="pandas")
 X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)
@@ -39,10 +39,9 @@
 # Create :class:`ConfusionMatrixDisplay`
 ##############################################################################
 # With the fitted model, we compute the predictions of the model on the test
-# dataset. These predictions are used to compute the confustion matrix which
+# dataset. These predictions are used to compute the confusion matrix which
 # is plotted with the :class:`ConfusionMatrixDisplay`
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import ConfusionMatrixDisplay
+from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
 
 y_pred = clf.predict(X_test)
 cm = confusion_matrix(y_test, y_pred)
@@ -56,8 +55,7 @@
 # The roc curve requires either the probabilities or the non-thresholded
 # decision values from the estimator. Since the logistic regression provides
 # a decision function, we will use it to plot the roc curve:
-from sklearn.metrics import roc_curve
-from sklearn.metrics import RocCurveDisplay
+from sklearn.metrics import RocCurveDisplay, roc_curve
 
 y_score = clf.decision_function(X_test)
 
@@ -69,8 +67,7 @@
 ##############################################################################
 # Similarly, the precision recall curve can be plotted using `y_score` from
 # the prevision sections.
-from sklearn.metrics import precision_recall_curve
-from sklearn.metrics import PrecisionRecallDisplay
+from sklearn.metrics import PrecisionRecallDisplay, precision_recall_curve
 
 prec, recall, _ = precision_recall_curve(y_test, y_score, pos_label=clf.classes_[1])
 pr_display = PrecisionRecallDisplay(precision=prec, recall=recall).plot()
diff --git a/examples/miscellaneous/plot_estimator_representation.py b/examples/miscellaneous/plot_estimator_representation.py
index 304bb055e6762..1c9e3745db0de 100644
--- a/examples/miscellaneous/plot_estimator_representation.py
+++ b/examples/miscellaneous/plot_estimator_representation.py
@@ -7,12 +7,11 @@
 displayed.
 """
 
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
-from sklearn.impute import SimpleImputer
 from sklearn.compose import make_column_transformer
+from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LogisticRegression
-
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 # %%
 # Compact text representation
diff --git a/examples/miscellaneous/plot_isotonic_regression.py b/examples/miscellaneous/plot_isotonic_regression.py
index 0240a8dec34b5..a1c1174c9e9de 100644
--- a/examples/miscellaneous/plot_isotonic_regression.py
+++ b/examples/miscellaneous/plot_isotonic_regression.py
@@ -23,12 +23,12 @@
 #         Alexandre Gramfort <alexandre.gramfort@inria.fr>
 # License: BSD
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.collections import LineCollection
 
-from sklearn.linear_model import LinearRegression
 from sklearn.isotonic import IsotonicRegression
+from sklearn.linear_model import LinearRegression
 from sklearn.utils import check_random_state
 
 n = 100
diff --git a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py
index 6fd9d3614804c..85161a6ee51bb 100644
--- a/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py
+++ b/examples/miscellaneous/plot_johnson_lindenstrauss_bound.py
@@ -15,13 +15,16 @@
 
 import sys
 from time import time
-import numpy as np
+
 import matplotlib.pyplot as plt
-from sklearn.random_projection import johnson_lindenstrauss_min_dim
-from sklearn.random_projection import SparseRandomProjection
-from sklearn.datasets import fetch_20newsgroups_vectorized
-from sklearn.datasets import load_digits
+import numpy as np
+
+from sklearn.datasets import fetch_20newsgroups_vectorized, load_digits
 from sklearn.metrics.pairwise import euclidean_distances
+from sklearn.random_projection import (
+    SparseRandomProjection,
+    johnson_lindenstrauss_min_dim,
+)
 
 # %%
 # Theoretical bounds
diff --git a/examples/miscellaneous/plot_kernel_approximation.py b/examples/miscellaneous/plot_kernel_approximation.py
index ffb5d3940a055..199739016efa8 100644
--- a/examples/miscellaneous/plot_kernel_approximation.py
+++ b/examples/miscellaneous/plot_kernel_approximation.py
@@ -39,14 +39,15 @@
 # License: BSD 3 clause
 
 # Standard scientific Python imports
+from time import time
+
 import matplotlib.pyplot as plt
 import numpy as np
-from time import time
 
 # Import datasets, classifiers and performance metrics
-from sklearn import datasets, svm, pipeline
-from sklearn.kernel_approximation import RBFSampler, Nystroem
+from sklearn import datasets, pipeline, svm
 from sklearn.decomposition import PCA
+from sklearn.kernel_approximation import Nystroem, RBFSampler
 
 # The digits dataset
 digits = datasets.load_digits(n_class=9)
@@ -71,18 +72,24 @@
 
 # Create a classifier: a support vector classifier
 kernel_svm = svm.SVC(gamma=0.2)
-linear_svm = svm.LinearSVC(dual="auto")
+linear_svm = svm.LinearSVC(dual="auto", random_state=42)
 
 # create pipeline from kernel approximation
 # and linear svm
 feature_map_fourier = RBFSampler(gamma=0.2, random_state=1)
 feature_map_nystroem = Nystroem(gamma=0.2, random_state=1)
 fourier_approx_svm = pipeline.Pipeline(
-    [("feature_map", feature_map_fourier), ("svm", svm.LinearSVC(dual="auto"))]
+    [
+        ("feature_map", feature_map_fourier),
+        ("svm", svm.LinearSVC(dual="auto", random_state=42)),
+    ]
 )
 
 nystroem_approx_svm = pipeline.Pipeline(
-    [("feature_map", feature_map_nystroem), ("svm", svm.LinearSVC(dual="auto"))]
+    [
+        ("feature_map", feature_map_nystroem),
+        ("svm", svm.LinearSVC(dual="auto", random_state=42)),
+    ]
 )
 
 # fit and predict using linear and kernel svm:
@@ -191,7 +198,7 @@
 
 # visualize the decision surface, projected down to the first
 # two principal components of the dataset
-pca = PCA(n_components=8).fit(data_train)
+pca = PCA(n_components=8, random_state=42).fit(data_train)
 
 X = pca.transform(data_train)
 
diff --git a/examples/miscellaneous/plot_kernel_ridge_regression.py b/examples/miscellaneous/plot_kernel_ridge_regression.py
index fa7cb15446473..6d2288936179a 100644
--- a/examples/miscellaneous/plot_kernel_ridge_regression.py
+++ b/examples/miscellaneous/plot_kernel_ridge_regression.py
@@ -40,9 +40,9 @@
 # Construct the kernel-based regression models
 # --------------------------------------------
 
+from sklearn.kernel_ridge import KernelRidge
 from sklearn.model_selection import GridSearchCV
 from sklearn.svm import SVR
-from sklearn.kernel_ridge import KernelRidge
 
 train_size = 100
 
diff --git a/examples/miscellaneous/plot_metadata_routing.py b/examples/miscellaneous/plot_metadata_routing.py
index 81e3b6fc9a01d..ae1ec857b59b0 100644
--- a/examples/miscellaneous/plot_metadata_routing.py
+++ b/examples/miscellaneous/plot_metadata_routing.py
@@ -22,23 +22,29 @@
 """
 # %%
 
-import numpy as np
 import warnings
 from pprint import pprint
+
+import numpy as np
+
 from sklearn import set_config
-from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
-from sklearn.base import RegressorMixin
-from sklearn.base import MetaEstimatorMixin
-from sklearn.base import TransformerMixin
-from sklearn.base import clone
+from sklearn.base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MetaEstimatorMixin,
+    RegressorMixin,
+    TransformerMixin,
+    clone,
+)
+from sklearn.linear_model import LinearRegression
 from sklearn.utils import metadata_routing
-from sklearn.utils.metadata_routing import get_routing_for_object
-from sklearn.utils.metadata_routing import MetadataRouter
-from sklearn.utils.metadata_routing import MethodMapping
-from sklearn.utils.metadata_routing import process_routing
+from sklearn.utils.metadata_routing import (
+    MetadataRouter,
+    MethodMapping,
+    get_routing_for_object,
+    process_routing,
+)
 from sklearn.utils.validation import check_is_fitted
-from sklearn.linear_model import LinearRegression
 
 n_samples, n_features = 100, 4
 rng = np.random.RandomState(42)
@@ -362,7 +368,7 @@ def predict(self, X, **predict_params):
 # In ``get_metadata_routing``, we add ``self`` to the routing using
 # ``add_self_request`` to indicate this estimator is consuming
 # ``sample_weight`` as well as being a router; which also adds a
-# ``$self_request`` key to the routing info as illustrated bellow. Now let's
+# ``$self_request`` key to the routing info as illustrated below. Now let's
 # look at some examples:
 
 # %%
@@ -402,8 +408,8 @@ def predict(self, X, **predict_params):
 
 # %%
 # - Alias only on the sub-estimator. This is useful if we don't want the
-# meta-estimator to use the metadata, and we only want the metadata to be used
-# by the sub-estimator.
+#   meta-estimator to use the metadata, and we only want the metadata to be used
+#   by the sub-estimator.
 est = RouterConsumerClassifier(
     estimator=ExampleClassifier().set_fit_request(sample_weight="aliased_sample_weight")
 ).set_fit_request(sample_weight=True)
@@ -615,9 +621,10 @@ def predict(self, X):
 # want to have a scikit-learn compatible estimator, without depending on the
 # scikit-learn package. If the following conditions are met, you do NOT need to
 # modify your code at all:
-#  - your estimator inherits from :class:`~base.BaseEstimator`
-#  - the parameters consumed by your estimator's methods, e.g. ``fit``, are
-#    explicitly defined in the method's signature, as opposed to being
-#    ``*args`` or ``*kwargs``.
-#  - you do not route any metadata to the underlying objects, i.e. you're not a
-#    *router*.
+#
+# - your estimator inherits from :class:`~base.BaseEstimator`
+# - the parameters consumed by your estimator's methods, e.g. ``fit``, are
+#   explicitly defined in the method's signature, as opposed to being
+#   ``*args`` or ``*kwargs``.
+# - you do not route any metadata to the underlying objects, i.e. you're not a
+#   *router*.
diff --git a/examples/miscellaneous/plot_multilabel.py b/examples/miscellaneous/plot_multilabel.py
index aded595258fea..b424c3253104a 100644
--- a/examples/miscellaneous/plot_multilabel.py
+++ b/examples/miscellaneous/plot_multilabel.py
@@ -32,14 +32,14 @@
 # Authors: Vlad Niculae, Mathieu Blondel
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
+from sklearn.cross_decomposition import CCA
 from sklearn.datasets import make_multilabel_classification
+from sklearn.decomposition import PCA
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.svm import SVC
-from sklearn.decomposition import PCA
-from sklearn.cross_decomposition import CCA
 
 
 def plot_hyperplane(clf, min_x, max_x, linestyle, label):
diff --git a/examples/miscellaneous/plot_multioutput_face_completion.py b/examples/miscellaneous/plot_multioutput_face_completion.py
index 31e73195747a5..62070bc05e488 100644
--- a/examples/miscellaneous/plot_multioutput_face_completion.py
+++ b/examples/miscellaneous/plot_multioutput_face_completion.py
@@ -12,16 +12,14 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.datasets import fetch_olivetti_faces
-from sklearn.utils.validation import check_random_state
-
 from sklearn.ensemble import ExtraTreesRegressor
+from sklearn.linear_model import LinearRegression, RidgeCV
 from sklearn.neighbors import KNeighborsRegressor
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import RidgeCV
+from sklearn.utils.validation import check_random_state
 
 # Load the faces datasets
 data, targets = fetch_olivetti_faces(return_X_y=True)
diff --git a/examples/miscellaneous/plot_outlier_detection_bench.py b/examples/miscellaneous/plot_outlier_detection_bench.py
index 781fa515f50e8..b4fae93131971 100644
--- a/examples/miscellaneous/plot_outlier_detection_bench.py
+++ b/examples/miscellaneous/plot_outlier_detection_bench.py
@@ -3,193 +3,451 @@
 Evaluation of outlier detection estimators
 ==========================================
 
-This example benchmarks outlier detection algorithms, :ref:`local_outlier_factor`
-(LOF) and :ref:`isolation_forest` (IForest), using ROC curves on
-classical anomaly detection datasets. The algorithm performance
-is assessed in an outlier detection context:
+This example compares two outlier detection algorithms, namely
+:ref:`local_outlier_factor` (LOF) and :ref:`isolation_forest` (IForest), on
+real-world datasets available in :class:`sklearn.datasets`. The goal is to show
+that different algorithms perform well on different datasets.
 
-1. The algorithms are trained on the whole dataset which is assumed to
-contain outliers.
+The algorithms are trained in an outlier detection context:
 
-2. The ROC curve from :class:`~sklearn.metrics.RocCurveDisplay` is computed
-on the same dataset using the knowledge of the labels.
+1. The ROC curves are computed using knowledge of the ground-truth labels
+and displayed using :class:`~sklearn.metrics.RocCurveDisplay`.
 
+2. The performance is assessed in terms of the ROC-AUC.
 """
 
 # Author: Pharuj Rajborirug <pharuj.ra@kmitl.ac.th>
+#         Arturo Amor <david-arturo.amor-quiroz@inria.fr>
 # License: BSD 3 clause
 
-print(__doc__)
+# %%
+# Dataset preprocessing and model training
+# ========================================
+#
+# Different outlier detection models require different preprocessing. In the
+# presence of categorical variables,
+# :class:`~sklearn.preprocessing.OrdinalEncoder` is often a good strategy for
+# tree-based models such as :class:`~sklearn.ensemble.IsolationForest`, whereas
+# neighbors-based models such as :class:`~sklearn.neighbors.LocalOutlierFactor`
+# would be impacted by the ordering induced by ordinal encoding. To avoid
+# inducing an ordering, on should rather use
+# :class:`~sklearn.preprocessing.OneHotEncoder`.
+#
+# Neighbors-based models may also require scaling of the numerical features (see
+# for instance :ref:`neighbors_scaling`). In the presence of outliers, a good
+# option is to use a :class:`~sklearn.preprocessing.RobustScaler`.
+
+from sklearn.compose import ColumnTransformer
+from sklearn.ensemble import IsolationForest
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import (
+    OneHotEncoder,
+    OrdinalEncoder,
+    RobustScaler,
+)
+
+
+def make_estimator(name, categorical_columns=None, iforest_kw=None, lof_kw=None):
+    """Create an outlier detection estimator based on its name."""
+    if name == "LOF":
+        outlier_detector = LocalOutlierFactor(**(lof_kw or {}))
+        if categorical_columns is None:
+            preprocessor = RobustScaler()
+        else:
+            preprocessor = ColumnTransformer(
+                transformers=[("categorical", OneHotEncoder(), categorical_columns)],
+                remainder=RobustScaler(),
+            )
+    else:  # name == "IForest"
+        outlier_detector = IsolationForest(**(iforest_kw or {}))
+        if categorical_columns is None:
+            preprocessor = None
+        else:
+            ordinal_encoder = OrdinalEncoder(
+                handle_unknown="use_encoded_value", unknown_value=-1
+            )
+            preprocessor = ColumnTransformer(
+                transformers=[
+                    ("categorical", ordinal_encoder, categorical_columns),
+                ],
+                remainder="passthrough",
+            )
+
+    return make_pipeline(preprocessor, outlier_detector)
+
+
+# %%
+# The following `fit_predict` function returns the average outlier score of X.
+
+from time import perf_counter
+
+
+def fit_predict(estimator, X):
+    tic = perf_counter()
+    if estimator[-1].__class__.__name__ == "LocalOutlierFactor":
+        estimator.fit(X)
+        y_pred = estimator[-1].negative_outlier_factor_
+    else:  # "IsolationForest"
+        y_pred = estimator.fit(X).decision_function(X)
+    toc = perf_counter()
+    print(f"Duration for {model_name}: {toc - tic:.2f} s")
+    return y_pred
+
 
 # %%
-# Define a data preprocessing function
-# ------------------------------------
+# On the rest of the example we process one dataset per section. After loading
+# the data, the targets are modified to consist of two classes: 0 representing
+# inliers and 1 representing outliers. Due to computational constraints of the
+# scikit-learn documentation, the sample size of some datasets is reduced using
+# a stratified :class:`~sklearn.model_selection.train_test_split`.
+#
+# Furthermore, we set `n_neighbors` to match the expected number of anomalies
+# `expected_n_anomalies = n_samples * expected_anomaly_fraction`. This is a good
+# heuristic as long as the proportion of outliers is not very low, the reason
+# being that `n_neighbors` should be at least greater than the number of samples
+# in the less populated cluster (see
+# :ref:`sphx_glr_auto_examples_neighbors_plot_lof_outlier_detection.py`).
 #
-# The example uses real-world datasets available in
-# :class:`sklearn.datasets` and the sample size of some datasets is reduced
-# to speed up computation. After the data preprocessing, the datasets' targets
-# will have two classes, 0 representing inliers and 1 representing outliers.
-# The `preprocess_dataset` function returns data and target.
+# KDDCup99 - SA dataset
+# ---------------------
+#
+# The :ref:`kddcup99_dataset` was generated using a closed network and
+# hand-injected attacks. The SA dataset is a subset of it obtained by simply
+# selecting all the normal data and an anomaly proportion of around 3%.
 
+# %%
 import numpy as np
-from sklearn.datasets import fetch_kddcup99, fetch_covtype, fetch_openml
-from sklearn.preprocessing import LabelBinarizer
-import pandas as pd
-
-rng = np.random.RandomState(42)
-
-
-def preprocess_dataset(dataset_name):
-    # loading and vectorization
-    print(f"Loading {dataset_name} data")
-    if dataset_name in ["http", "smtp", "SA", "SF"]:
-        dataset = fetch_kddcup99(subset=dataset_name, percent10=True, random_state=rng)
-        X = dataset.data
-        y = dataset.target
-        lb = LabelBinarizer()
-
-        if dataset_name == "SF":
-            idx = rng.choice(X.shape[0], int(X.shape[0] * 0.1), replace=False)
-            X = X[idx]  # reduce the sample size
-            y = y[idx]
-            x1 = lb.fit_transform(X[:, 1].astype(str))
-            X = np.c_[X[:, :1], x1, X[:, 2:]]
-        elif dataset_name == "SA":
-            idx = rng.choice(X.shape[0], int(X.shape[0] * 0.1), replace=False)
-            X = X[idx]  # reduce the sample size
-            y = y[idx]
-            x1 = lb.fit_transform(X[:, 1].astype(str))
-            x2 = lb.fit_transform(X[:, 2].astype(str))
-            x3 = lb.fit_transform(X[:, 3].astype(str))
-            X = np.c_[X[:, :1], x1, x2, x3, X[:, 4:]]
-        y = (y != b"normal.").astype(int)
-    if dataset_name == "forestcover":
-        dataset = fetch_covtype()
-        X = dataset.data
-        y = dataset.target
-        idx = rng.choice(X.shape[0], int(X.shape[0] * 0.1), replace=False)
-        X = X[idx]  # reduce the sample size
-        y = y[idx]
-
-        # inliers are those with attribute 2
-        # outliers are those with attribute 4
-        s = (y == 2) + (y == 4)
-        X = X[s, :]
-        y = y[s]
-        y = (y != 2).astype(int)
-    if dataset_name in ["glass", "wdbc", "cardiotocography"]:
-        dataset = fetch_openml(
-            name=dataset_name, version=1, as_frame=False, parser="pandas"
-        )
-        X = dataset.data
-        y = dataset.target
-
-        if dataset_name == "glass":
-            s = y == "tableware"
-            y = s.astype(int)
-        if dataset_name == "wdbc":
-            s = y == "2"
-            y = s.astype(int)
-            X_mal, y_mal = X[s], y[s]
-            X_ben, y_ben = X[~s], y[~s]
-
-            # downsampled to 39 points (9.8% outliers)
-            idx = rng.choice(y_mal.shape[0], 39, replace=False)
-            X_mal2 = X_mal[idx]
-            y_mal2 = y_mal[idx]
-            X = np.concatenate((X_ben, X_mal2), axis=0)
-            y = np.concatenate((y_ben, y_mal2), axis=0)
-        if dataset_name == "cardiotocography":
-            s = y == "3"
-            y = s.astype(int)
-    # 0 represents inliers, and 1 represents outliers
-    y = pd.Series(y, dtype="category")
-    return (X, y)
-
-
-# %%
-# Define an outlier prediction function
-# -------------------------------------
-# There is no particular reason to choose algorithms
-# :class:`~sklearn.neighbors.LocalOutlierFactor` and
-# :class:`~sklearn.ensemble.IsolationForest`. The goal is to show that
-# different algorithm performs well on different datasets. The following
-# `compute_prediction` function returns average outlier score of X.
 
+from sklearn.datasets import fetch_kddcup99
+from sklearn.model_selection import train_test_split
 
-from sklearn.neighbors import LocalOutlierFactor
-from sklearn.ensemble import IsolationForest
+X, y = fetch_kddcup99(
+    subset="SA", percent10=True, random_state=42, return_X_y=True, as_frame=True
+)
+y = (y != b"normal.").astype(np.int32)
+X, _, y, _ = train_test_split(X, y, train_size=0.1, stratify=y, random_state=42)
 
+n_samples, anomaly_frac = X.shape[0], y.mean()
+print(f"{n_samples} datapoints with {y.sum()} anomalies ({anomaly_frac:.02%})")
 
-def compute_prediction(X, model_name):
-    print(f"Computing {model_name} prediction...")
-    if model_name == "LOF":
-        clf = LocalOutlierFactor(n_neighbors=20, contamination="auto")
-        clf.fit(X)
-        y_pred = clf.negative_outlier_factor_
-    if model_name == "IForest":
-        clf = IsolationForest(random_state=rng, contamination="auto")
-        y_pred = clf.fit(X).decision_function(X)
-    return y_pred
+# %%
+# The SA dataset contains 41 features out of which 3 are categorical:
+# "protocol_type", "service" and "flag".
+
+# %%
+y_true = {}
+y_pred = {"LOF": {}, "IForest": {}}
+model_names = ["LOF", "IForest"]
+cat_columns = ["protocol_type", "service", "flag"]
 
+y_true["KDDCup99 - SA"] = y
+for model_name in model_names:
+    model = make_estimator(
+        name=model_name,
+        categorical_columns=cat_columns,
+        lof_kw={"n_neighbors": int(n_samples * anomaly_frac)},
+        iforest_kw={"random_state": 42},
+    )
+    y_pred[model_name]["KDDCup99 - SA"] = fit_predict(model, X)
 
 # %%
-# Plot and interpret results
-# --------------------------
+# Forest covertypes dataset
+# -------------------------
 #
-# The algorithm performance relates to how good the true positive rate (TPR)
-# is at low value of the false positive rate (FPR). The best algorithms
-# have the curve on the top-left of the plot and the area under curve (AUC)
-# close to 1. The diagonal dashed line represents a random classification
-# of outliers and inliers.
+# The :ref:`covtype_dataset` is a multiclass dataset where the target is the
+# dominant species of tree in a given patch of forest. It contains 54 features,
+# some of which ("Wilderness_Area" and "Soil_Type") are already binary encoded.
+# Though originally meant as a classification task, one can regard inliers as
+# samples encoded with label 2 and outliers as those with label 4.
 
+# %%
+from sklearn.datasets import fetch_covtype
 
-import math
+X, y = fetch_covtype(return_X_y=True, as_frame=True)
+s = (y == 2) + (y == 4)
+X = X.loc[s]
+y = y.loc[s]
+y = (y != 2).astype(np.int32)
+
+X, _, y, _ = train_test_split(X, y, train_size=0.05, stratify=y, random_state=42)
+X_forestcover = X  # save X for later use
+
+n_samples, anomaly_frac = X.shape[0], y.mean()
+print(f"{n_samples} datapoints with {y.sum()} anomalies ({anomaly_frac:.02%})")
+
+# %%
+y_true["forestcover"] = y
+for model_name in model_names:
+    model = make_estimator(
+        name=model_name,
+        lof_kw={"n_neighbors": int(n_samples * anomaly_frac)},
+        iforest_kw={"random_state": 42},
+    )
+    y_pred[model_name]["forestcover"] = fit_predict(model, X)
+
+# %%
+# Ames Housing dataset
+# --------------------
+#
+# The `Ames housing dataset <http://www.openml.org/d/43926>`_ is originally a
+# regression dataset where the target are sales prices of houses in Ames, Iowa.
+# Here we convert it into an outlier detection problem by regarding houses with
+# price over 70 USD/sqft. To make the problem easier, we drop intermediate
+# prices between 40 and 70 USD/sqft.
+
+# %%
 import matplotlib.pyplot as plt
-from sklearn.metrics import RocCurveDisplay
 
-datasets_name = [
-    "http",
-    "smtp",
-    "SA",
-    "SF",
-    "forestcover",
-    "glass",
-    "wdbc",
-    "cardiotocography",
-]
+from sklearn.datasets import fetch_openml
 
-models_name = [
-    "LOF",
-    "IForest",
-]
+X, y = fetch_openml(
+    name="ames_housing", version=1, return_X_y=True, as_frame=True, parser="pandas"
+)
+y = y.div(X["Lot_Area"])
+
+# None values in pandas 1.5.1 were mapped to np.nan in pandas 2.0.1
+X["Misc_Feature"] = X["Misc_Feature"].cat.add_categories("NoInfo").fillna("NoInfo")
+X["Mas_Vnr_Type"] = X["Mas_Vnr_Type"].cat.add_categories("NoInfo").fillna("NoInfo")
+
+X.drop(columns="Lot_Area", inplace=True)
+mask = (y < 40) | (y > 70)
+X = X.loc[mask]
+y = y.loc[mask]
+y.hist(bins=20, edgecolor="black")
+plt.xlabel("House price in USD/sqft")
+_ = plt.title("Distribution of house prices in Ames")
+
+# %%
+y = (y > 70).astype(np.int32)
+
+n_samples, anomaly_frac = X.shape[0], y.mean()
+print(f"{n_samples} datapoints with {y.sum()} anomalies ({anomaly_frac:.02%})")
+
+# %%
+# The dataset contains 46 categorical features. In this case it is easier use a
+# :class:`~sklearn.compose.make_column_selector` to find them instead of passing
+# a list made by hand.
+
+# %%
+from sklearn.compose import make_column_selector as selector
+
+categorical_columns_selector = selector(dtype_include="category")
+cat_columns = categorical_columns_selector(X)
+
+y_true["ames_housing"] = y
+for model_name in model_names:
+    model = make_estimator(
+        name=model_name,
+        categorical_columns=cat_columns,
+        lof_kw={"n_neighbors": int(n_samples * anomaly_frac)},
+        iforest_kw={"random_state": 42},
+    )
+    y_pred[model_name]["ames_housing"] = fit_predict(model, X)
+
+# %%
+# Cardiotocography dataset
+# ------------------------
+#
+# The `Cardiotocography dataset <http://www.openml.org/d/1466>`_ is a multiclass
+# dataset of fetal cardiotocograms, the classes being the fetal heart rate (FHR)
+# pattern encoded with labels from 1 to 10. Here we set class 3 (the minority
+# class) to represent the outliers. It contains 30 numerical features, some of
+# which are binary encoded and some are continuous.
+
+# %%
+X, y = fetch_openml(
+    name="cardiotocography", version=1, return_X_y=True, as_frame=False, parser="pandas"
+)
+X_cardiotocography = X  # save X for later use
+s = y == "3"
+y = s.astype(np.int32)
+
+n_samples, anomaly_frac = X.shape[0], y.mean()
+print(f"{n_samples} datapoints with {y.sum()} anomalies ({anomaly_frac:.02%})")
+
+# %%
+y_true["cardiotocography"] = y
+for model_name in model_names:
+    model = make_estimator(
+        name=model_name,
+        lof_kw={"n_neighbors": int(n_samples * anomaly_frac)},
+        iforest_kw={"random_state": 42},
+    )
+    y_pred[model_name]["cardiotocography"] = fit_predict(model, X)
+
+# %%
+# Plot and interpret results
+# ==========================
+#
+# The algorithm performance relates to how good the true positive rate (TPR) is
+# at low value of the false positive rate (FPR). The best algorithms have the
+# curve on the top-left of the plot and the area under curve (AUC) close to 1.
+# The diagonal dashed line represents a random classification of outliers and
+# inliers.
+
+# %%
+import math
+
+from sklearn.metrics import RocCurveDisplay
 
-# plotting parameters
 cols = 2
-linewidth = 1
 pos_label = 0  # mean 0 belongs to positive class
-rows = math.ceil(len(datasets_name) / cols)
-
-fig, axs = plt.subplots(rows, cols, figsize=(10, rows * 3), sharex=True, sharey=True)
+datasets_names = y_true.keys()
+rows = math.ceil(len(datasets_names) / cols)
 
-for i, dataset_name in enumerate(datasets_name):
-    (X, y) = preprocess_dataset(dataset_name=dataset_name)
+fig, axs = plt.subplots(nrows=rows, ncols=cols, squeeze=False, figsize=(10, rows * 4))
 
-    for model_idx, model_name in enumerate(models_name):
-        y_pred = compute_prediction(X, model_name=model_name)
+for ax, dataset_name in zip(axs.ravel(), datasets_names):
+    for model_idx, model_name in enumerate(model_names):
         display = RocCurveDisplay.from_predictions(
-            y,
-            y_pred,
+            y_true[dataset_name],
+            y_pred[model_name][dataset_name],
             pos_label=pos_label,
             name=model_name,
-            linewidth=linewidth,
-            ax=axs[i // cols, i % cols],
-            plot_chance_level=(model_idx == len(models_name) - 1),
-            chance_level_kw={
-                "linewidth": linewidth,
-                "linestyle": ":",
-            },
+            ax=ax,
+            plot_chance_level=(model_idx == len(model_names) - 1),
+            chance_level_kw={"linestyle": ":"},
         )
-    axs[i // cols, i % cols].set_title(dataset_name)
-plt.tight_layout(pad=2.0)  # spacing between subplots
+    ax.set_title(dataset_name)
+_ = plt.tight_layout(pad=2.0)  # spacing between subplots
+
+# %%
+# We observe that once the number of neighbors is tuned, LOF and IForest perform
+# similarly in terms of ROC AUC for the forestcover and cardiotocography
+# datasets. The score for IForest is slightly better for the SA dataset and LOF
+# performs considerably better on the Ames housing dataset than IForest.
+#
+# Ablation study
+# ==============
+#
+# In this section we explore the impact of the hyperparameter `n_neighbors` and
+# the choice of scaling the numerical variables on the LOF model. Here we use
+# the :ref:`covtype_dataset` dataset as the binary encoded categories introduce
+# a natural scale of euclidean distances between 0 and 1. We then want a scaling
+# method to avoid granting a privilege to non-binary features and that is robust
+# enough to outliers so that the task of finding them does not become too
+# difficult.
+
+# %%
+X = X_forestcover
+y = y_true["forestcover"]
+
+n_samples = X.shape[0]
+n_neighbors_list = (n_samples * np.array([0.2, 0.02, 0.01, 0.001])).astype(np.int32)
+model = make_pipeline(RobustScaler(), LocalOutlierFactor())
+
+linestyles = ["solid", "dashed", "dashdot", ":", (5, (10, 3))]
+
+fig, ax = plt.subplots()
+for model_idx, (linestyle, n_neighbors) in enumerate(zip(linestyles, n_neighbors_list)):
+    model.set_params(localoutlierfactor__n_neighbors=n_neighbors)
+    model.fit(X)
+    y_pred = model[-1].negative_outlier_factor_
+    display = RocCurveDisplay.from_predictions(
+        y,
+        y_pred,
+        pos_label=pos_label,
+        name=f"n_neighbors = {n_neighbors}",
+        ax=ax,
+        plot_chance_level=(model_idx == len(n_neighbors_list) - 1),
+        chance_level_kw={"linestyle": (0, (1, 10))},
+        linestyle=linestyle,
+        linewidth=2,
+    )
+_ = ax.set_title("RobustScaler with varying n_neighbors\non forestcover dataset")
+
+# %%
+# We observe that the number of neighbors has a big impact on the performance of
+# the model. If one has access to (at least some) ground truth labels, it is
+# then important to tune `n_neighbors` accordingly. A convenient way to do so is
+# to explore values for `n_neighbors` of the order of magnitud of the expected
+# contamination.
+
+# %%
+from sklearn.preprocessing import MinMaxScaler, SplineTransformer, StandardScaler
+
+preprocessor_list = [
+    None,
+    RobustScaler(),
+    StandardScaler(),
+    MinMaxScaler(),
+    SplineTransformer(),
+]
+expected_anomaly_fraction = 0.02
+lof = LocalOutlierFactor(n_neighbors=int(n_samples * expected_anomaly_fraction))
+
+fig, ax = plt.subplots()
+for model_idx, (linestyle, preprocessor) in enumerate(
+    zip(linestyles, preprocessor_list)
+):
+    model = make_pipeline(preprocessor, lof)
+    model.fit(X)
+    y_pred = model[-1].negative_outlier_factor_
+    display = RocCurveDisplay.from_predictions(
+        y,
+        y_pred,
+        pos_label=pos_label,
+        name=str(preprocessor).split("(")[0],
+        ax=ax,
+        plot_chance_level=(model_idx == len(preprocessor_list) - 1),
+        chance_level_kw={"linestyle": (0, (1, 10))},
+        linestyle=linestyle,
+        linewidth=2,
+    )
+_ = ax.set_title("Fixed n_neighbors with varying preprocessing\non forestcover dataset")
+
+# %%
+# On the one hand, :class:`~sklearn.preprocessing.RobustScaler` scales each
+# feature independently by using the interquartile range (IQR) by default, which
+# is the range between the 25th and 75th percentiles of the data. It centers the
+# data by subtracting the median and then scale it by dividing by the IQR. The
+# IQR is robust to outliers: the median and interquartile range are less
+# affected by extreme values than the range, the mean and the standard
+# deviation. Furthermore, :class:`~sklearn.preprocessing.RobustScaler` does not
+# squash marginal outlier values, contrary to
+# :class:`~sklearn.preprocessing.StandardScaler`.
+#
+# On the other hand, :class:`~sklearn.preprocessing.MinMaxScaler` scales each
+# feature individually such that its range maps into the range between zero and
+# one. If there are outliers in the data, they can skew it towards either the
+# minimum or maximum values, leading to a completely different distribution of
+# data with large marginal outliers: all non-outlier values can be collapsed
+# almost together as a result.
+#
+# We also evaluated no preprocessing at all (by passing `None` to the pipeline),
+# :class:`~sklearn.preprocessing.StandardScaler` and
+# :class:`~sklearn.preprocessing.SplineTransformer`. Please refer to their
+# respective documentation for more details.
+#
+# Note that the optimal preprocessing depends on the dataset, as shown below:
+
+# %%
+X = X_cardiotocography
+y = y_true["cardiotocography"]
+
+n_samples, expected_anomaly_fraction = X.shape[0], 0.025
+lof = LocalOutlierFactor(n_neighbors=int(n_samples * expected_anomaly_fraction))
+
+fig, ax = plt.subplots()
+for model_idx, (linestyle, preprocessor) in enumerate(
+    zip(linestyles, preprocessor_list)
+):
+    model = make_pipeline(preprocessor, lof)
+    model.fit(X)
+    y_pred = model[-1].negative_outlier_factor_
+    display = RocCurveDisplay.from_predictions(
+        y,
+        y_pred,
+        pos_label=pos_label,
+        name=str(preprocessor).split("(")[0],
+        ax=ax,
+        plot_chance_level=(model_idx == len(preprocessor_list) - 1),
+        chance_level_kw={"linestyle": (0, (1, 10))},
+        linestyle=linestyle,
+        linewidth=2,
+    )
+ax.set_title(
+    "Fixed n_neighbors with varying preprocessing\non cardiotocography dataset"
+)
 plt.show()
diff --git a/examples/miscellaneous/plot_partial_dependence_visualization_api.py b/examples/miscellaneous/plot_partial_dependence_visualization_api.py
index 336d7c36d1661..38a984fa5b0cd 100644
--- a/examples/miscellaneous/plot_partial_dependence_visualization_api.py
+++ b/examples/miscellaneous/plot_partial_dependence_visualization_api.py
@@ -13,15 +13,15 @@
 
 """  # noqa: E501
 
-import pandas as pd
 import matplotlib.pyplot as plt
+import pandas as pd
+
 from sklearn.datasets import load_diabetes
+from sklearn.inspection import PartialDependenceDisplay
 from sklearn.neural_network import MLPRegressor
-from sklearn.preprocessing import StandardScaler
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 from sklearn.tree import DecisionTreeRegressor
-from sklearn.inspection import PartialDependenceDisplay
-
 
 # %%
 # Train models on the diabetes dataset
diff --git a/examples/miscellaneous/plot_pipeline_display.py b/examples/miscellaneous/plot_pipeline_display.py
index f0fea8d2f3a27..9642bb56b903f 100755
--- a/examples/miscellaneous/plot_pipeline_display.py
+++ b/examples/miscellaneous/plot_pipeline_display.py
@@ -19,10 +19,10 @@
 # :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
 # representation.
 
+from sklearn import set_config
+from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
-from sklearn.linear_model import LogisticRegression
-from sklearn import set_config
 
 steps = [
     ("preprocessing", StandardScaler()),
@@ -53,9 +53,9 @@
 # :class:`~sklearn.linear_model.LogisticRegression`, and displays its visual
 # representation.
 
-from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import StandardScaler, PolynomialFeatures
 from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import PolynomialFeatures, StandardScaler
 
 steps = [
     ("standard_scaler", StandardScaler()),
@@ -73,9 +73,9 @@
 # a classifier, :class:`~sklearn.svm.SVC`, and displays its visual
 # representation.
 
+from sklearn.decomposition import PCA
 from sklearn.pipeline import Pipeline
 from sklearn.svm import SVC
-from sklearn.decomposition import PCA
 
 steps = [("reduce_dim", PCA(n_components=4)), ("classifier", SVC(kernel="linear"))]
 pipe = Pipeline(steps)
@@ -90,12 +90,12 @@
 # representation.
 
 import numpy as np
-from sklearn.pipeline import make_pipeline
-from sklearn.pipeline import Pipeline
-from sklearn.impute import SimpleImputer
+
 from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.impute import SimpleImputer
 from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 numeric_preprocessor = Pipeline(
     steps=[
@@ -133,13 +133,13 @@
 # representation.
 
 import numpy as np
-from sklearn.pipeline import make_pipeline
-from sklearn.pipeline import Pipeline
-from sklearn.impute import SimpleImputer
+
 from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.impute import SimpleImputer
 from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 numeric_preprocessor = Pipeline(
     steps=[
diff --git a/examples/miscellaneous/plot_roc_curve_visualization_api.py b/examples/miscellaneous/plot_roc_curve_visualization_api.py
index b4e08493c77d4..7fc8df9724337 100644
--- a/examples/miscellaneous/plot_roc_curve_visualization_api.py
+++ b/examples/miscellaneous/plot_roc_curve_visualization_api.py
@@ -15,11 +15,12 @@
 # First, we load the wine dataset and convert it to a binary classification
 # problem. Then, we train a support vector classifier on a training dataset.
 import matplotlib.pyplot as plt
-from sklearn.svm import SVC
+
+from sklearn.datasets import load_wine
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.metrics import RocCurveDisplay
-from sklearn.datasets import load_wine
 from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC
 
 X, y = load_wine(return_X_y=True)
 y = y == 2
diff --git a/examples/miscellaneous/plot_set_output.py b/examples/miscellaneous/plot_set_output.py
index a2088ae48adc3..725e04894614c 100644
--- a/examples/miscellaneous/plot_set_output.py
+++ b/examples/miscellaneous/plot_set_output.py
@@ -48,9 +48,9 @@
 # %%
 # In a :class:`pipeline.Pipeline`, `set_output` configures all steps to output
 # DataFrames.
-from sklearn.pipeline import make_pipeline
-from sklearn.linear_model import LogisticRegression
 from sklearn.feature_selection import SelectPercentile
+from sklearn.linear_model import LogisticRegression
+from sklearn.pipeline import make_pipeline
 
 clf = make_pipeline(
     StandardScaler(), SelectPercentile(percentile=75), LogisticRegression()
@@ -76,10 +76,10 @@
 # %%
 # The `set_output` API can be configured globally by using :func:`set_config` and
 # setting `transform_output` to `"pandas"`.
+from sklearn import set_config
 from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OneHotEncoder, StandardScaler
 from sklearn.impute import SimpleImputer
-from sklearn import set_config
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
 
 set_config(transform_output="pandas")
 
diff --git a/examples/mixture/plot_concentration_prior.py b/examples/mixture/plot_concentration_prior.py
index a56ec6325068b..6561186adb119 100644
--- a/examples/mixture/plot_concentration_prior.py
+++ b/examples/mixture/plot_concentration_prior.py
@@ -32,10 +32,10 @@
 # Author: Thierry Guillemot <thierry.guillemot.work@gmail.com>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib as mpl
-import matplotlib.pyplot as plt
 import matplotlib.gridspec as gridspec
+import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.mixture import BayesianGaussianMixture
 
diff --git a/examples/mixture/plot_gmm.py b/examples/mixture/plot_gmm.py
index efc89baa8159a..82e48a8d13eb0 100644
--- a/examples/mixture/plot_gmm.py
+++ b/examples/mixture/plot_gmm.py
@@ -26,10 +26,10 @@
 
 import itertools
 
+import matplotlib as mpl
+import matplotlib.pyplot as plt
 import numpy as np
 from scipy import linalg
-import matplotlib.pyplot as plt
-import matplotlib as mpl
 
 from sklearn import mixture
 
diff --git a/examples/mixture/plot_gmm_covariances.py b/examples/mixture/plot_gmm_covariances.py
index aa0b78ab42a0b..9466e11749966 100644
--- a/examples/mixture/plot_gmm_covariances.py
+++ b/examples/mixture/plot_gmm_covariances.py
@@ -33,7 +33,6 @@
 
 import matplotlib as mpl
 import matplotlib.pyplot as plt
-
 import numpy as np
 
 from sklearn import datasets
diff --git a/examples/mixture/plot_gmm_init.py b/examples/mixture/plot_gmm_init.py
index 3b4beefe8c99a..aa0266c98ff7a 100644
--- a/examples/mixture/plot_gmm_init.py
+++ b/examples/mixture/plot_gmm_init.py
@@ -37,12 +37,14 @@
 # Author: Gordon Walsh <gordon.p.walsh@gmail.com>
 # Data generation code from Jake Vanderplas <vanderplas@astro.washington.edu>
 
+from timeit import default_timer as timer
+
 import matplotlib.pyplot as plt
 import numpy as np
+
+from sklearn.datasets._samples_generator import make_blobs
 from sklearn.mixture import GaussianMixture
 from sklearn.utils.extmath import row_norms
-from sklearn.datasets._samples_generator import make_blobs
-from timeit import default_timer as timer
 
 print(__doc__)
 
diff --git a/examples/mixture/plot_gmm_pdf.py b/examples/mixture/plot_gmm_pdf.py
index 70d58f22f8f41..062bdfd4d6d67 100644
--- a/examples/mixture/plot_gmm_pdf.py
+++ b/examples/mixture/plot_gmm_pdf.py
@@ -9,9 +9,10 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.colors import LogNorm
+
 from sklearn import mixture
 
 n_samples = 300
diff --git a/examples/mixture/plot_gmm_sin.py b/examples/mixture/plot_gmm_sin.py
index c8656a69fe9fb..34af17b8920bc 100644
--- a/examples/mixture/plot_gmm_sin.py
+++ b/examples/mixture/plot_gmm_sin.py
@@ -41,10 +41,10 @@
 
 import itertools
 
+import matplotlib as mpl
+import matplotlib.pyplot as plt
 import numpy as np
 from scipy import linalg
-import matplotlib.pyplot as plt
-import matplotlib as mpl
 
 from sklearn import mixture
 
diff --git a/examples/model_selection/plot_confusion_matrix.py b/examples/model_selection/plot_confusion_matrix.py
index b891564db4025..278083a994e58 100644
--- a/examples/model_selection/plot_confusion_matrix.py
+++ b/examples/model_selection/plot_confusion_matrix.py
@@ -24,12 +24,12 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn import svm, datasets
-from sklearn.model_selection import train_test_split
+from sklearn import datasets, svm
 from sklearn.metrics import ConfusionMatrixDisplay
+from sklearn.model_selection import train_test_split
 
 # import some data to play with
 iris = datasets.load_iris()
diff --git a/examples/model_selection/plot_cv_indices.py b/examples/model_selection/plot_cv_indices.py
index 8b70191e4abd1..e6c3580c787f0 100644
--- a/examples/model_selection/plot_cv_indices.py
+++ b/examples/model_selection/plot_cv_indices.py
@@ -12,19 +12,20 @@
 
 """
 
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.patches import Patch
+
 from sklearn.model_selection import (
-    TimeSeriesSplit,
+    GroupKFold,
+    GroupShuffleSplit,
     KFold,
     ShuffleSplit,
+    StratifiedGroupKFold,
     StratifiedKFold,
-    GroupShuffleSplit,
-    GroupKFold,
     StratifiedShuffleSplit,
-    StratifiedGroupKFold,
+    TimeSeriesSplit,
 )
-import numpy as np
-import matplotlib.pyplot as plt
-from matplotlib.patches import Patch
 
 rng = np.random.RandomState(1338)
 cmap_data = plt.cm.Paired
diff --git a/examples/model_selection/plot_cv_predict.py b/examples/model_selection/plot_cv_predict.py
index 7fd843c535c85..65517d85f3fd1 100644
--- a/examples/model_selection/plot_cv_predict.py
+++ b/examples/model_selection/plot_cv_predict.py
@@ -37,6 +37,7 @@
 # residuals (i.e. the difference between the observed values and the predicted
 # values) vs. the predicted values.
 import matplotlib.pyplot as plt
+
 from sklearn.metrics import PredictionErrorDisplay
 
 fig, axs = plt.subplots(ncols=2, figsize=(8, 4))
diff --git a/examples/model_selection/plot_det.py b/examples/model_selection/plot_det.py
index 97dbe771e6407..7f7a5be32f976 100644
--- a/examples/model_selection/plot_det.py
+++ b/examples/model_selection/plot_det.py
@@ -82,6 +82,7 @@
 # :func:`scipy.stats.norm`.
 
 import matplotlib.pyplot as plt
+
 from sklearn.metrics import DetCurveDisplay, RocCurveDisplay
 
 fig, [ax_roc, ax_det] = plt.subplots(1, 2, figsize=(11, 5))
diff --git a/examples/model_selection/plot_grid_search_refit_callable.py b/examples/model_selection/plot_grid_search_refit_callable.py
index 7a7dd8ea3e463..a8dab986a48d2 100644
--- a/examples/model_selection/plot_grid_search_refit_callable.py
+++ b/examples/model_selection/plot_grid_search_refit_callable.py
@@ -20,8 +20,8 @@
 
 # Author: Wenhao Zhang <wenhaoz@ucla.edu>
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.datasets import load_digits
 from sklearn.decomposition import PCA
diff --git a/examples/model_selection/plot_grid_search_stats.py b/examples/model_selection/plot_grid_search_stats.py
index 179d860b42128..fbeb485d8db44 100644
--- a/examples/model_selection/plot_grid_search_stats.py
+++ b/examples/model_selection/plot_grid_search_stats.py
@@ -16,6 +16,7 @@
 
 import matplotlib.pyplot as plt
 import seaborn as sns
+
 from sklearn.datasets import make_moons
 
 X, y = make_moons(noise=0.352, random_state=1, n_samples=100)
diff --git a/examples/model_selection/plot_grid_search_text_feature_extraction.py b/examples/model_selection/plot_grid_search_text_feature_extraction.py
index 9ad4296aad9b4..f82cd82b13112 100644
--- a/examples/model_selection/plot_grid_search_text_feature_extraction.py
+++ b/examples/model_selection/plot_grid_search_text_feature_extraction.py
@@ -25,7 +25,7 @@
 # ------------
 # We load two categories from the training set. You can adjust the number of
 # categories by adding their names to the list or setting `categories=None` when
-# calling the dataset loader :func:`~sklearn.datasets.fetch20newsgroups` to get
+# calling the dataset loader :func:`~sklearn.datasets.fetch_20newsgroups` to get
 # the 20 of them.
 
 from sklearn.datasets import fetch_20newsgroups
@@ -105,6 +105,7 @@
 # via the parameter `n_jobs`.
 
 from pprint import pprint
+
 from sklearn.model_selection import RandomizedSearchCV
 
 random_search = RandomizedSearchCV(
diff --git a/examples/model_selection/plot_learning_curve.py b/examples/model_selection/plot_learning_curve.py
index 956c70aaabd82..450392679095f 100644
--- a/examples/model_selection/plot_learning_curve.py
+++ b/examples/model_selection/plot_learning_curve.py
@@ -38,6 +38,7 @@
 # a cross-validation procedure.
 import matplotlib.pyplot as plt
 import numpy as np
+
 from sklearn.model_selection import LearningCurveDisplay, ShuffleSplit
 
 fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10, 6), sharey=True)
diff --git a/examples/model_selection/plot_likelihood_ratios.py b/examples/model_selection/plot_likelihood_ratios.py
index e6ec94fc50cf9..9a3f29def9e98 100644
--- a/examples/model_selection/plot_likelihood_ratios.py
+++ b/examples/model_selection/plot_likelihood_ratios.py
@@ -55,8 +55,8 @@ class proportion than the target application.
 # ratio to evaluate the usefulness of this classifier as a disease diagnosis
 # tool:
 
-from sklearn.metrics import class_likelihood_ratios
 from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import class_likelihood_ratios
 
 estimator = LogisticRegression().fit(X_train, y_train)
 y_pred = estimator.predict(X_test)
@@ -166,10 +166,12 @@ def extract_score(cv_results):
 # label `1` corresponds to the positive class "disease", whereas the label `0`
 # stands for "no-disease".
 
-import numpy as np
+from collections import defaultdict
+
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.inspection import DecisionBoundaryDisplay
-from collections import defaultdict
 
 populations = defaultdict(list)
 common_params = {
diff --git a/examples/model_selection/plot_multi_metric_evaluation.py b/examples/model_selection/plot_multi_metric_evaluation.py
index e47e67e086ccb..674bf8bc1b07c 100644
--- a/examples/model_selection/plot_multi_metric_evaluation.py
+++ b/examples/model_selection/plot_multi_metric_evaluation.py
@@ -23,9 +23,8 @@
 from matplotlib import pyplot as plt
 
 from sklearn.datasets import make_hastie_10_2
+from sklearn.metrics import accuracy_score, make_scorer
 from sklearn.model_selection import GridSearchCV
-from sklearn.metrics import make_scorer
-from sklearn.metrics import accuracy_score
 from sklearn.tree import DecisionTreeClassifier
 
 # %%
diff --git a/examples/model_selection/plot_nested_cross_validation_iris.py b/examples/model_selection/plot_nested_cross_validation_iris.py
index b6f45255e8a09..7513a078b68ce 100644
--- a/examples/model_selection/plot_nested_cross_validation_iris.py
+++ b/examples/model_selection/plot_nested_cross_validation_iris.py
@@ -44,11 +44,12 @@
 
 """
 
-from sklearn.datasets import load_iris
+import numpy as np
 from matplotlib import pyplot as plt
+
+from sklearn.datasets import load_iris
+from sklearn.model_selection import GridSearchCV, KFold, cross_val_score
 from sklearn.svm import SVC
-from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
-import numpy as np
 
 # Number of random trials
 NUM_TRIALS = 30
diff --git a/examples/model_selection/plot_permutation_tests_for_classification.py b/examples/model_selection/plot_permutation_tests_for_classification.py
index c9fcaebb549fe..a02f6d188f006 100644
--- a/examples/model_selection/plot_permutation_tests_for_classification.py
+++ b/examples/model_selection/plot_permutation_tests_for_classification.py
@@ -58,9 +58,8 @@
 # the percentage of permutations for which the score obtained is greater
 # that the score obtained using the original data.
 
+from sklearn.model_selection import StratifiedKFold, permutation_test_score
 from sklearn.svm import SVC
-from sklearn.model_selection import StratifiedKFold
-from sklearn.model_selection import permutation_test_score
 
 clf = SVC(kernel="linear", random_state=7)
 cv = StratifiedKFold(2, shuffle=True, random_state=0)
diff --git a/examples/model_selection/plot_precision_recall.py b/examples/model_selection/plot_precision_recall.py
index d11d6e10cdff6..52d85691af439 100644
--- a/examples/model_selection/plot_precision_recall.py
+++ b/examples/model_selection/plot_precision_recall.py
@@ -100,6 +100,7 @@
 #
 # We will use a Linear SVC classifier to differentiate two types of irises.
 import numpy as np
+
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 
@@ -198,8 +199,7 @@
 # %%
 # The average precision score in multi-label settings
 # ...................................................
-from sklearn.metrics import precision_recall_curve
-from sklearn.metrics import average_precision_score
+from sklearn.metrics import average_precision_score, precision_recall_curve
 
 # For each class
 precision = dict()
@@ -232,9 +232,10 @@
 # %%
 # Plot Precision-Recall curve for each class and iso-f1 curves
 # ............................................................
-import matplotlib.pyplot as plt
 from itertools import cycle
 
+import matplotlib.pyplot as plt
+
 # setup plot details
 colors = cycle(["navy", "turquoise", "darkorange", "cornflowerblue", "teal"])
 
diff --git a/examples/model_selection/plot_randomized_search.py b/examples/model_selection/plot_randomized_search.py
index 9ffc26a5abc84..140b359ff1934 100644
--- a/examples/model_selection/plot_randomized_search.py
+++ b/examples/model_selection/plot_randomized_search.py
@@ -20,14 +20,14 @@
 
 """
 
-import numpy as np
-
 from time import time
+
+import numpy as np
 import scipy.stats as stats
 
-from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
 from sklearn.datasets import load_digits
 from sklearn.linear_model import SGDClassifier
+from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
 
 # get some data
 X, y = load_digits(return_X_y=True, n_class=3)
diff --git a/examples/model_selection/plot_roc.py b/examples/model_selection/plot_roc.py
index 34346780def26..79cfde38bad0f 100644
--- a/examples/model_selection/plot_roc.py
+++ b/examples/model_selection/plot_roc.py
@@ -44,6 +44,7 @@
 # Here we binarize the output and add noisy features to make the problem harder.
 
 import numpy as np
+
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 
@@ -118,6 +119,7 @@
 
 # %%
 import matplotlib.pyplot as plt
+
 from sklearn.metrics import RocCurveDisplay
 
 RocCurveDisplay.from_predictions(
@@ -139,13 +141,13 @@
 # ----------------------------------
 #
 # Micro-averaging aggregates the contributions from all the classes (using
-# :func:`np.ravel`) to compute the average metrics as follows:
+# :func:`numpy.ravel`) to compute the average metrics as follows:
 #
 # :math:`TPR=\frac{\sum_{c}TP_c}{\sum_{c}(TP_c + FN_c)}` ;
 #
 # :math:`FPR=\frac{\sum_{c}FP_c}{\sum_{c}(FP_c + TN_c)}` .
 #
-# We can briefly demo the effect of :func:`np.ravel`:
+# We can briefly demo the effect of :func:`numpy.ravel`:
 
 print(f"y_score:\n{y_score[0:2,:]}")
 print()
@@ -191,7 +193,7 @@
 # :class:`~sklearn.metrics.roc_curve` and then the area under the curve with
 # :class:`~sklearn.metrics.auc` for the raveled true and predicted classes.
 
-from sklearn.metrics import roc_curve, auc
+from sklearn.metrics import auc, roc_curve
 
 # store the fpr, tpr, and roc_auc for all averaging strategies
 fpr, tpr, roc_auc = dict(), dict(), dict()
diff --git a/examples/model_selection/plot_roc_crossval.py b/examples/model_selection/plot_roc_crossval.py
index cf4c0496f54fb..a3663aa040b56 100644
--- a/examples/model_selection/plot_roc_crossval.py
+++ b/examples/model_selection/plot_roc_crossval.py
@@ -41,6 +41,7 @@
 # (`class_id=0`).
 
 import numpy as np
+
 from sklearn.datasets import load_iris
 
 iris = load_iris()
@@ -66,8 +67,7 @@
 import matplotlib.pyplot as plt
 
 from sklearn import svm
-from sklearn.metrics import auc
-from sklearn.metrics import RocCurveDisplay
+from sklearn.metrics import RocCurveDisplay, auc
 from sklearn.model_selection import StratifiedKFold
 
 n_splits = 6
diff --git a/examples/model_selection/plot_successive_halving_heatmap.py b/examples/model_selection/plot_successive_halving_heatmap.py
index ecdae48e64011..9b079e4b1351f 100644
--- a/examples/model_selection/plot_successive_halving_heatmap.py
+++ b/examples/model_selection/plot_successive_halving_heatmap.py
@@ -14,12 +14,10 @@
 import numpy as np
 import pandas as pd
 
-from sklearn.svm import SVC
 from sklearn import datasets
-from sklearn.model_selection import GridSearchCV
 from sklearn.experimental import enable_halving_search_cv  # noqa
-from sklearn.model_selection import HalvingGridSearchCV
-
+from sklearn.model_selection import GridSearchCV, HalvingGridSearchCV
+from sklearn.svm import SVC
 
 # %%
 # We first define the parameter space for an :class:`~sklearn.svm.SVC`
diff --git a/examples/model_selection/plot_successive_halving_iterations.py b/examples/model_selection/plot_successive_halving_iterations.py
index bd2d5635e376e..31805d308e269 100644
--- a/examples/model_selection/plot_successive_halving_iterations.py
+++ b/examples/model_selection/plot_successive_halving_iterations.py
@@ -10,16 +10,15 @@
 
 """
 
-import pandas as pd
-from sklearn import datasets
 import matplotlib.pyplot as plt
-from scipy.stats import randint
 import numpy as np
+import pandas as pd
+from scipy.stats import randint
 
+from sklearn import datasets
+from sklearn.ensemble import RandomForestClassifier
 from sklearn.experimental import enable_halving_search_cv  # noqa
 from sklearn.model_selection import HalvingRandomSearchCV
-from sklearn.ensemble import RandomForestClassifier
-
 
 # %%
 # We first define the parameter space and train a
diff --git a/examples/model_selection/plot_train_error_vs_test_error.py b/examples/model_selection/plot_train_error_vs_test_error.py
index 1aba6f4892cbe..af7e7d14cdac0 100644
--- a/examples/model_selection/plot_train_error_vs_test_error.py
+++ b/examples/model_selection/plot_train_error_vs_test_error.py
@@ -19,6 +19,7 @@
 # Generate sample data
 # --------------------
 import numpy as np
+
 from sklearn import linear_model
 from sklearn.datasets import make_regression
 from sklearn.model_selection import train_test_split
diff --git a/examples/model_selection/plot_underfitting_overfitting.py b/examples/model_selection/plot_underfitting_overfitting.py
index ae8450b50cea9..412946fc9ca8b 100644
--- a/examples/model_selection/plot_underfitting_overfitting.py
+++ b/examples/model_selection/plot_underfitting_overfitting.py
@@ -21,12 +21,13 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
-from sklearn.pipeline import Pipeline
-from sklearn.preprocessing import PolynomialFeatures
+import numpy as np
+
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import cross_val_score
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import PolynomialFeatures
 
 
 def true_fun(X):
diff --git a/examples/model_selection/plot_validation_curve.py b/examples/model_selection/plot_validation_curve.py
index 48aa19dfbc556..947d8ac2b2fdb 100644
--- a/examples/model_selection/plot_validation_curve.py
+++ b/examples/model_selection/plot_validation_curve.py
@@ -17,8 +17,8 @@
 import numpy as np
 
 from sklearn.datasets import load_digits
-from sklearn.svm import SVC
 from sklearn.model_selection import ValidationCurveDisplay
+from sklearn.svm import SVC
 
 X, y = load_digits(return_X_y=True)
 subset_mask = np.isin(y, [1, 2])  # binary classification: 1 vs 2
diff --git a/examples/multioutput/plot_classifier_chain_yeast.py b/examples/multioutput/plot_classifier_chain_yeast.py
index e1f9feed43a97..1df4ee3b8346b 100644
--- a/examples/multioutput/plot_classifier_chain_yeast.py
+++ b/examples/multioutput/plot_classifier_chain_yeast.py
@@ -36,14 +36,15 @@
 # Author: Adam Kleczewski
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.datasets import fetch_openml
-from sklearn.multioutput import ClassifierChain
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import jaccard_score
 from sklearn.model_selection import train_test_split
 from sklearn.multiclass import OneVsRestClassifier
-from sklearn.metrics import jaccard_score
-from sklearn.linear_model import LogisticRegression
+from sklearn.multioutput import ClassifierChain
 
 # Load a multi-label dataset from https://www.openml.org/d/40597
 X, Y = fetch_openml("yeast", version=4, return_X_y=True, parser="pandas")
diff --git a/examples/neighbors/approximate_nearest_neighbors.py b/examples/neighbors/approximate_nearest_neighbors.py
index ee848cdc66428..faff31d7a85c9 100644
--- a/examples/neighbors/approximate_nearest_neighbors.py
+++ b/examples/neighbors/approximate_nearest_neighbors.py
@@ -40,6 +40,7 @@
 import joblib
 import numpy as np
 from scipy.sparse import csr_matrix
+
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.datasets import fetch_openml
 from sklearn.utils import shuffle
diff --git a/examples/neighbors/plot_caching_nearest_neighbors.py b/examples/neighbors/plot_caching_nearest_neighbors.py
index 00be6470c1591..10c0d315da7af 100644
--- a/examples/neighbors/plot_caching_nearest_neighbors.py
+++ b/examples/neighbors/plot_caching_nearest_neighbors.py
@@ -22,11 +22,12 @@
 #
 # License: BSD 3 clause
 from tempfile import TemporaryDirectory
+
 import matplotlib.pyplot as plt
 
-from sklearn.neighbors import KNeighborsTransformer, KNeighborsClassifier
-from sklearn.model_selection import GridSearchCV
 from sklearn.datasets import load_digits
+from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KNeighborsClassifier, KNeighborsTransformer
 from sklearn.pipeline import Pipeline
 
 X, y = load_digits(return_X_y=True)
diff --git a/examples/neighbors/plot_classification.py b/examples/neighbors/plot_classification.py
index cc4f0864ba926..43c45558054cf 100644
--- a/examples/neighbors/plot_classification.py
+++ b/examples/neighbors/plot_classification.py
@@ -3,60 +3,92 @@
 Nearest Neighbors Classification
 ================================
 
-Sample usage of Nearest Neighbors classification.
-It will plot the decision boundaries for each class.
-
+This example shows how to use :class:`~sklearn.neighbors.KNeighborsClassifier`.
+We train such a classifier on the iris dataset and observe the difference of the
+decision boundary obtained with regards to the parameter `weights`.
 """
 
-import matplotlib.pyplot as plt
-import seaborn as sns
-from matplotlib.colors import ListedColormap
-from sklearn import neighbors, datasets
-from sklearn.inspection import DecisionBoundaryDisplay
+# %%
+# Load the data
+# -------------
+#
+# In this example, we use the iris dataset. We split the data into a train and test
+# dataset.
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
 
-n_neighbors = 15
+iris = load_iris(as_frame=True)
+X = iris.data[["sepal length (cm)", "sepal width (cm)"]]
+y = iris.target
+X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=0)
 
-# import some data to play with
-iris = datasets.load_iris()
+# %%
+# K-nearest neighbors classifier
+# ------------------------------
+#
+# We want to use a k-nearest neighbors classifier considering a neighborhood of 11 data
+# points. Since our k-nearest neighbors model uses euclidean distance to find the
+# nearest neighbors, it is therefore important to scale the data beforehand. Refer to
+# the example entitled
+# :ref:`sphx_glr_auto_examples_preprocessing_plot_scaling_importance.py` for more
+# detailed information.
+#
+# Thus, we use a :class:`~sklearn.pipeline.Pipeline` to chain a scaler before to use
+# our classifier.
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
 
-# we only take the first two features. We could avoid this ugly
-# slicing by using a two-dim dataset
-X = iris.data[:, :2]
-y = iris.target
+clf = Pipeline(
+    steps=[("scaler", StandardScaler()), ("knn", KNeighborsClassifier(n_neighbors=11))]
+)
+
+# %%
+# Decision boundary
+# -----------------
+#
+# Now, we fit two classifiers with different values of the parameter
+# `weights`. We plot the decision boundary of each classifier as well as the original
+# dataset to observe the difference.
+import matplotlib.pyplot as plt
 
-# Create color maps
-cmap_light = ListedColormap(["orange", "cyan", "cornflowerblue"])
-cmap_bold = ["darkorange", "c", "darkblue"]
+from sklearn.inspection import DecisionBoundaryDisplay
 
-for weights in ["uniform", "distance"]:
-    # we create an instance of Neighbours Classifier and fit the data.
-    clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights)
-    clf.fit(X, y)
+_, axs = plt.subplots(ncols=2, figsize=(12, 5))
 
-    _, ax = plt.subplots()
-    DecisionBoundaryDisplay.from_estimator(
+for ax, weights in zip(axs, ("uniform", "distance")):
+    clf.set_params(knn__weights=weights).fit(X_train, y_train)
+    disp = DecisionBoundaryDisplay.from_estimator(
         clf,
-        X,
-        cmap=cmap_light,
-        ax=ax,
+        X_test,
         response_method="predict",
         plot_method="pcolormesh",
         xlabel=iris.feature_names[0],
         ylabel=iris.feature_names[1],
         shading="auto",
+        alpha=0.5,
+        ax=ax,
     )
-
-    # Plot also the training points
-    sns.scatterplot(
-        x=X[:, 0],
-        y=X[:, 1],
-        hue=iris.target_names[y],
-        palette=cmap_bold,
-        alpha=1.0,
-        edgecolor="black",
+    scatter = disp.ax_.scatter(X.iloc[:, 0], X.iloc[:, 1], c=y, edgecolors="k")
+    disp.ax_.legend(
+        scatter.legend_elements()[0],
+        iris.target_names,
+        loc="lower left",
+        title="Classes",
     )
-    plt.title(
-        "3-Class classification (k = %i, weights = '%s')" % (n_neighbors, weights)
+    _ = disp.ax_.set_title(
+        f"3-Class classification\n(k={clf[-1].n_neighbors}, weights={weights!r})"
     )
 
 plt.show()
+
+# %%
+# Conclusion
+# ----------
+#
+# We observe that the parameter `weights` has an impact on the decision boundary. When
+# `weights="unifom"` all nearest neighbors will have the same impact on the decision.
+# Whereas when `weights="distance"` the weight given to each neighbor is proportional
+# to the inverse of the distance from that neighbor to the query point.
+#
+# In some cases, taking the distance into account might improve the model.
diff --git a/examples/neighbors/plot_digits_kde_sampling.py b/examples/neighbors/plot_digits_kde_sampling.py
index e580f9fa178bc..045058eab09cc 100644
--- a/examples/neighbors/plot_digits_kde_sampling.py
+++ b/examples/neighbors/plot_digits_kde_sampling.py
@@ -11,13 +11,13 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.datasets import load_digits
-from sklearn.neighbors import KernelDensity
 from sklearn.decomposition import PCA
 from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KernelDensity
 
 # load the data
 digits = load_digits()
diff --git a/examples/neighbors/plot_kde_1d.py b/examples/neighbors/plot_kde_1d.py
index 8b139d4cc2335..fc5b1914f23de 100644
--- a/examples/neighbors/plot_kde_1d.py
+++ b/examples/neighbors/plot_kde_1d.py
@@ -30,9 +30,10 @@
 
 # Author: Jake Vanderplas <jakevdp@cs.washington.edu>
 #
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from scipy.stats import norm
+
 from sklearn.neighbors import KernelDensity
 
 # ----------------------------------------------------------------------
diff --git a/examples/neighbors/plot_lof_novelty_detection.py b/examples/neighbors/plot_lof_novelty_detection.py
index 277134cc77673..91e40661c6dfe 100644
--- a/examples/neighbors/plot_lof_novelty_detection.py
+++ b/examples/neighbors/plot_lof_novelty_detection.py
@@ -25,9 +25,10 @@
 
 """
 
-import numpy as np
 import matplotlib
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.neighbors import LocalOutlierFactor
 
 np.random.seed(42)
diff --git a/examples/neighbors/plot_nca_classification.py b/examples/neighbors/plot_nca_classification.py
index a08bbe8be3756..f76770640ed03 100644
--- a/examples/neighbors/plot_nca_classification.py
+++ b/examples/neighbors/plot_nca_classification.py
@@ -19,13 +19,13 @@
 
 import matplotlib.pyplot as plt
 from matplotlib.colors import ListedColormap
+
 from sklearn import datasets
+from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler
 from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis
 from sklearn.pipeline import Pipeline
-from sklearn.inspection import DecisionBoundaryDisplay
-
+from sklearn.preprocessing import StandardScaler
 
 n_neighbors = 1
 
diff --git a/examples/neighbors/plot_nca_dim_reduction.py b/examples/neighbors/plot_nca_dim_reduction.py
index d245e0223ccfa..82fd35616929e 100644
--- a/examples/neighbors/plot_nca_dim_reduction.py
+++ b/examples/neighbors/plot_nca_dim_reduction.py
@@ -30,12 +30,13 @@
 
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import datasets
-from sklearn.model_selection import train_test_split
 from sklearn.decomposition import PCA
 from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.model_selection import train_test_split
 from sklearn.neighbors import KNeighborsClassifier, NeighborhoodComponentsAnalysis
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
diff --git a/examples/neighbors/plot_nca_illustration.py b/examples/neighbors/plot_nca_illustration.py
index d722ffa5be033..e5fd2f9cb67bd 100644
--- a/examples/neighbors/plot_nca_illustration.py
+++ b/examples/neighbors/plot_nca_illustration.py
@@ -12,13 +12,14 @@
 
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
-from sklearn.datasets import make_classification
-from sklearn.neighbors import NeighborhoodComponentsAnalysis
+import numpy as np
 from matplotlib import cm
 from scipy.special import logsumexp
 
+from sklearn.datasets import make_classification
+from sklearn.neighbors import NeighborhoodComponentsAnalysis
+
 # %%
 # Original points
 # ---------------
diff --git a/examples/neighbors/plot_nearest_centroid.py b/examples/neighbors/plot_nearest_centroid.py
index 4eb0e0388a30b..c8f710d0a0377 100644
--- a/examples/neighbors/plot_nearest_centroid.py
+++ b/examples/neighbors/plot_nearest_centroid.py
@@ -8,13 +8,13 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.colors import ListedColormap
+
 from sklearn import datasets
-from sklearn.neighbors import NearestCentroid
 from sklearn.inspection import DecisionBoundaryDisplay
-
+from sklearn.neighbors import NearestCentroid
 
 # import some data to play with
 iris = datasets.load_iris()
diff --git a/examples/neighbors/plot_regression.py b/examples/neighbors/plot_regression.py
index 78b850d1a4e2c..d5ceba8a34860 100644
--- a/examples/neighbors/plot_regression.py
+++ b/examples/neighbors/plot_regression.py
@@ -18,8 +18,9 @@
 # %%
 # Generate sample data
 # --------------------
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import neighbors
 
 np.random.seed(0)
diff --git a/examples/neighbors/plot_species_kde.py b/examples/neighbors/plot_species_kde.py
index 35ea40158a45c..3783138dfcb76 100644
--- a/examples/neighbors/plot_species_kde.py
+++ b/examples/neighbors/plot_species_kde.py
@@ -40,8 +40,9 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.datasets import fetch_species_distributions
 from sklearn.neighbors import KernelDensity
 
diff --git a/examples/neural_networks/plot_mlp_alpha.py b/examples/neural_networks/plot_mlp_alpha.py
index 443d41f4707bf..b53beef54c115 100644
--- a/examples/neural_networks/plot_mlp_alpha.py
+++ b/examples/neural_networks/plot_mlp_alpha.py
@@ -23,11 +23,12 @@
 import numpy as np
 from matplotlib import pyplot as plt
 from matplotlib.colors import ListedColormap
+
+from sklearn.datasets import make_circles, make_classification, make_moons
 from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler
-from sklearn.datasets import make_moons, make_circles, make_classification
 from sklearn.neural_network import MLPClassifier
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 
 h = 0.02  # step size in the mesh
 
diff --git a/examples/neural_networks/plot_mlp_training_curves.py b/examples/neural_networks/plot_mlp_training_curves.py
index 3fbddda879162..a9f03c2599a8e 100644
--- a/examples/neural_networks/plot_mlp_training_curves.py
+++ b/examples/neural_networks/plot_mlp_training_curves.py
@@ -18,10 +18,10 @@
 
 import matplotlib.pyplot as plt
 
-from sklearn.neural_network import MLPClassifier
-from sklearn.preprocessing import MinMaxScaler
 from sklearn import datasets
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.neural_network import MLPClassifier
+from sklearn.preprocessing import MinMaxScaler
 
 # different learning rate schedules and momentum parameters
 params = [
diff --git a/examples/neural_networks/plot_mnist_filters.py b/examples/neural_networks/plot_mnist_filters.py
index 03f615786e830..43e6a171fb696 100644
--- a/examples/neural_networks/plot_mnist_filters.py
+++ b/examples/neural_networks/plot_mnist_filters.py
@@ -25,11 +25,13 @@
 """
 
 import warnings
+
 import matplotlib.pyplot as plt
+
 from sklearn.datasets import fetch_openml
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.neural_network import MLPClassifier
 from sklearn.model_selection import train_test_split
+from sklearn.neural_network import MLPClassifier
 
 # Load data from https://www.openml.org/d/554
 X, y = fetch_openml(
diff --git a/examples/neural_networks/plot_rbm_logistic_classification.py b/examples/neural_networks/plot_rbm_logistic_classification.py
index de939922d9514..3ba878d4ad191 100644
--- a/examples/neural_networks/plot_rbm_logistic_classification.py
+++ b/examples/neural_networks/plot_rbm_logistic_classification.py
@@ -23,13 +23,11 @@
 # linear shifts of 1 pixel in each direction.
 
 import numpy as np
-
 from scipy.ndimage import convolve
 
 from sklearn import datasets
-from sklearn.preprocessing import minmax_scale
-
 from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import minmax_scale
 
 
 def nudge_dataset(X, Y):
diff --git a/examples/preprocessing/plot_all_scaling.py b/examples/preprocessing/plot_all_scaling.py
index 2893f5cf01ccb..f53c50e33875a 100644
--- a/examples/preprocessing/plot_all_scaling.py
+++ b/examples/preprocessing/plot_all_scaling.py
@@ -45,22 +45,22 @@
 #          Thomas Unterthiner
 # License: BSD 3 clause
 
-import numpy as np
-
 import matplotlib as mpl
-from matplotlib import pyplot as plt
+import numpy as np
 from matplotlib import cm
-
-from sklearn.preprocessing import MinMaxScaler
-from sklearn.preprocessing import minmax_scale
-from sklearn.preprocessing import MaxAbsScaler
-from sklearn.preprocessing import StandardScaler
-from sklearn.preprocessing import RobustScaler
-from sklearn.preprocessing import Normalizer
-from sklearn.preprocessing import QuantileTransformer
-from sklearn.preprocessing import PowerTransformer
+from matplotlib import pyplot as plt
 
 from sklearn.datasets import fetch_california_housing
+from sklearn.preprocessing import (
+    MaxAbsScaler,
+    MinMaxScaler,
+    Normalizer,
+    PowerTransformer,
+    QuantileTransformer,
+    RobustScaler,
+    StandardScaler,
+    minmax_scale,
+)
 
 dataset = fetch_california_housing()
 X_full, y_full = dataset.data, dataset.target
@@ -102,11 +102,15 @@
     ),
     (
         "Data after quantile transformation (uniform pdf)",
-        QuantileTransformer(output_distribution="uniform").fit_transform(X),
+        QuantileTransformer(
+            output_distribution="uniform", random_state=42
+        ).fit_transform(X),
     ),
     (
         "Data after quantile transformation (gaussian pdf)",
-        QuantileTransformer(output_distribution="normal").fit_transform(X),
+        QuantileTransformer(
+            output_distribution="normal", random_state=42
+        ).fit_transform(X),
     ),
     ("Data after sample-wise L2 normalizing", Normalizer().fit_transform(X)),
 ]
@@ -265,6 +269,8 @@ def make_plot(item_idx):
 make_plot(0)
 
 # %%
+# .. _plot_all_scaling_standard_scaler_section:
+#
 # StandardScaler
 # --------------
 #
@@ -285,6 +291,8 @@ def make_plot(item_idx):
 make_plot(1)
 
 # %%
+# .. _plot_all_scaling_minmax_scaler_section:
+#
 # MinMaxScaler
 # ------------
 #
@@ -301,6 +309,8 @@ def make_plot(item_idx):
 make_plot(2)
 
 # %%
+# .. _plot_all_scaling_max_abs_scaler_section:
+#
 # MaxAbsScaler
 # ------------
 #
@@ -318,6 +328,8 @@ def make_plot(item_idx):
 make_plot(3)
 
 # %%
+# .. _plot_all_scaling_robust_scaler_section:
+#
 # RobustScaler
 # ------------
 #
@@ -335,6 +347,8 @@ def make_plot(item_idx):
 make_plot(4)
 
 # %%
+# .. _plot_all_scaling_power_transformer_section:
+#
 # PowerTransformer
 # ----------------
 #
@@ -353,6 +367,8 @@ def make_plot(item_idx):
 make_plot(6)
 
 # %%
+# .. _plot_all_scaling_quantile_transformer_section:
+#
 # QuantileTransformer (uniform output)
 # ------------------------------------
 #
@@ -384,6 +400,8 @@ def make_plot(item_idx):
 make_plot(8)
 
 # %%
+# .. _plot_all_scaling_normalizer_section:
+#
 # Normalizer
 # ----------
 #
diff --git a/examples/preprocessing/plot_discretization.py b/examples/preprocessing/plot_discretization.py
index ffb3f9403634d..002d606da0c9d 100644
--- a/examples/preprocessing/plot_discretization.py
+++ b/examples/preprocessing/plot_discretization.py
@@ -31,8 +31,8 @@
 #         Hanmin Qin <qinhanmin2005@sina.com>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.linear_model import LinearRegression
 from sklearn.preprocessing import KBinsDiscretizer
diff --git a/examples/preprocessing/plot_discretization_classification.py b/examples/preprocessing/plot_discretization_classification.py
index a35c56ea683d6..f3edcac0011d7 100644
--- a/examples/preprocessing/plot_discretization_classification.py
+++ b/examples/preprocessing/plot_discretization_classification.py
@@ -33,20 +33,19 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from matplotlib.colors import ListedColormap
-from sklearn.model_selection import train_test_split
-from sklearn.preprocessing import StandardScaler
-from sklearn.datasets import make_moons, make_circles, make_classification
+
+from sklearn.datasets import make_circles, make_classification, make_moons
+from sklearn.ensemble import GradientBoostingClassifier
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV
+from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import KBinsDiscretizer
+from sklearn.preprocessing import KBinsDiscretizer, StandardScaler
 from sklearn.svm import SVC, LinearSVC
-from sklearn.ensemble import GradientBoostingClassifier
 from sklearn.utils._testing import ignore_warnings
-from sklearn.exceptions import ConvergenceWarning
 
 h = 0.02  # step size in the mesh
 
@@ -75,7 +74,7 @@ def get_name(estimator):
     (
         make_pipeline(
             StandardScaler(),
-            KBinsDiscretizer(encode="onehot"),
+            KBinsDiscretizer(encode="onehot", random_state=0),
             LogisticRegression(random_state=0),
         ),
         {
@@ -86,7 +85,7 @@ def get_name(estimator):
     (
         make_pipeline(
             StandardScaler(),
-            KBinsDiscretizer(encode="onehot"),
+            KBinsDiscretizer(encode="onehot", random_state=0),
             LinearSVC(random_state=0, dual="auto"),
         ),
         {
diff --git a/examples/preprocessing/plot_discretization_strategies.py b/examples/preprocessing/plot_discretization_strategies.py
index 91904246540dd..b4c2f3ca1858d 100644
--- a/examples/preprocessing/plot_discretization_strategies.py
+++ b/examples/preprocessing/plot_discretization_strategies.py
@@ -19,11 +19,11 @@
 # Author: Tom Dupré la Tour
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn.preprocessing import KBinsDiscretizer
 from sklearn.datasets import make_blobs
+from sklearn.preprocessing import KBinsDiscretizer
 
 strategies = ["uniform", "quantile", "kmeans"]
 
diff --git a/examples/preprocessing/plot_map_data_to_normal.py b/examples/preprocessing/plot_map_data_to_normal.py
index 42a61d84fa384..a521039098871 100644
--- a/examples/preprocessing/plot_map_data_to_normal.py
+++ b/examples/preprocessing/plot_map_data_to_normal.py
@@ -38,13 +38,11 @@
 #         Nicolas Hug <contact@nicolas-hug.com>
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
-from sklearn.preprocessing import PowerTransformer
-from sklearn.preprocessing import QuantileTransformer
 from sklearn.model_selection import train_test_split
-
+from sklearn.preprocessing import PowerTransformer, QuantileTransformer
 
 N_SAMPLES = 1000
 FONT_SIZE = 6
diff --git a/examples/preprocessing/plot_scaling_importance.py b/examples/preprocessing/plot_scaling_importance.py
index 4e8f87b68b1d4..5b78fe0636d8c 100644
--- a/examples/preprocessing/plot_scaling_importance.py
+++ b/examples/preprocessing/plot_scaling_importance.py
@@ -52,6 +52,8 @@
 scaled_X_train = scaler.fit_transform(X_train)
 
 # %%
+# .. _neighbors_scaling:
+#
 # Effect of rescaling on a k-neighbors models
 # ===========================================
 #
@@ -65,10 +67,10 @@
 # of features.
 
 import matplotlib.pyplot as plt
+
 from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.neighbors import KNeighborsClassifier
 
-
 X_plot = X[["proline", "hue"]]
 X_plot_scaled = scaler.fit_transform(X_plot)
 clf = KNeighborsClassifier(n_neighbors=20)
@@ -122,6 +124,7 @@ def fit_and_plot_model(X_plot, y, clf, ax):
 # We can inspect the first principal components using all the original features:
 
 import pandas as pd
+
 from sklearn.decomposition import PCA
 
 pca = PCA(n_components=2).fit(X_train)
@@ -199,8 +202,9 @@ def fit_and_plot_model(X_plot, y, clf, ax):
 # non-scaling of the data:
 
 import numpy as np
-from sklearn.pipeline import make_pipeline
+
 from sklearn.linear_model import LogisticRegressionCV
+from sklearn.pipeline import make_pipeline
 
 Cs = np.logspace(-5, 5, 20)
 
@@ -218,8 +222,7 @@ def fit_and_plot_model(X_plot, y, clf, ax):
 # was not scaled before applying PCA. We now evaluate the effect of scaling on
 # the accuracy and the mean log-loss of the optimal models:
 
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import log_loss
+from sklearn.metrics import accuracy_score, log_loss
 
 y_pred = unscaled_clf.predict(X_test)
 y_pred_scaled = scaled_clf.predict(X_test)
diff --git a/examples/preprocessing/plot_target_encoder.py b/examples/preprocessing/plot_target_encoder.py
index a50f0199e5ba8..4513897cd3a90 100644
--- a/examples/preprocessing/plot_target_encoder.py
+++ b/examples/preprocessing/plot_target_encoder.py
@@ -12,7 +12,7 @@
 
 .. note::
     `fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a
-    cross-validation scheme is used in `fit_transform` for encoding. See the
+    cross fitting scheme is used in `fit_transform` for encoding. See the
     :ref:`User Guide <target_encoder>`. for details.
 """
 
@@ -55,9 +55,7 @@
 # strategies. First, we list out the encoders we will be using to preprocess
 # the categorical features:
 from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import OrdinalEncoder
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import TargetEncoder
+from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, TargetEncoder
 
 categorical_preprocessors = [
     ("drop", "drop"),
@@ -71,9 +69,9 @@
 
 # %%
 # Next, we evaluate the models using cross validation and record the results:
-from sklearn.pipeline import make_pipeline
-from sklearn.model_selection import cross_validate
 from sklearn.ensemble import HistGradientBoostingRegressor
+from sklearn.model_selection import cross_validate
+from sklearn.pipeline import make_pipeline
 
 n_cv_folds = 3
 max_iter = 20
diff --git a/examples/preprocessing/plot_target_encoder_cross_val.py b/examples/preprocessing/plot_target_encoder_cross_val.py
index 455625cc47460..7244a1bf61cd6 100644
--- a/examples/preprocessing/plot_target_encoder_cross_val.py
+++ b/examples/preprocessing/plot_target_encoder_cross_val.py
@@ -1,29 +1,35 @@
 """
-==========================================
-Target Encoder's Internal Cross Validation
-==========================================
+=======================================
+Target Encoder's Internal Cross fitting
+=======================================
 
 .. currentmodule:: sklearn.preprocessing
 
-The :class:`TargetEnocoder` replaces each category of a categorical feature with
-the mean of the target variable for that category. This method is useful
+The :class:`TargetEncoder` replaces each category of a categorical feature with
+the shrunk mean of the target variable for that category. This method is useful
 in cases where there is a strong relationship between the categorical feature
 and the target. To prevent overfitting, :meth:`TargetEncoder.fit_transform` uses
-interval cross validation to encode the training data to be used by a downstream
-model. In this example, we demonstrate the importance of the cross validation
-procedure to prevent overfitting.
+an internal :term:`cross fitting` scheme to encode the training data to be used
+by a downstream model. This scheme involves splitting the data into *k* folds
+and encoding each fold using the encodings learnt using the other *k-1* folds.
+In this example, we demonstrate the importance of the cross
+fitting procedure to prevent overfitting.
 """
 
 # %%
 # Create Synthetic Dataset
 # ========================
-# For this example, we build a dataset with three categorical features: an informative
-# feature with medium cardinality, an uninformative feature with medium cardinality,
-# and an uninformative feature with high cardinality. First, we generate the informative
-# feature:
-from sklearn.preprocessing import KBinsDiscretizer
+# For this example, we build a dataset with three categorical features:
+#
+# * an informative feature with medium cardinality ("informative")
+# * an uninformative feature with medium cardinality ("shuffled")
+# * an uninformative feature with high cardinality ("near_unique")
+#
+# First, we generate the informative feature:
 import numpy as np
 
+from sklearn.preprocessing import KBinsDiscretizer
+
 n_samples = 50_000
 
 rng = np.random.RandomState(42)
@@ -32,12 +38,16 @@
 n_categories = 100
 
 kbins = KBinsDiscretizer(
-    n_bins=n_categories, encode="ordinal", strategy="uniform", random_state=rng
+    n_bins=n_categories,
+    encode="ordinal",
+    strategy="uniform",
+    random_state=rng,
+    subsample=None,
 )
 X_informative = kbins.fit_transform((y + noise).reshape(-1, 1))
 
-# Remove the linear relationship between y and the bin index by permuting the values of
-# X_informative
+# Remove the linear relationship between y and the bin index by permuting the
+# values of X_informative:
 permuted_categories = rng.permutation(n_categories)
 X_informative = permuted_categories[X_informative.astype(np.int32)]
 
@@ -47,22 +57,23 @@
 X_shuffled = rng.permutation(X_informative)
 
 # %%
-# The uninformative feature with high cardinality is generated so that is independent of
-# the target variable. We will show that target encoding without cross validation will
-# cause catastrophic overfitting for the downstream regressor. These high cardinality
-# features are basically unique identifiers for samples which should generally be
-# removed from machine learning dataset. In this example, we generate them to show how
-# :class:`TargetEncoder`'s default cross validation behavior mitigates the overfitting
-# issue automatically.
+# The uninformative feature with high cardinality is generated so that it is
+# independent of the target variable. We will show that target encoding without
+# :term:`cross fitting` will cause catastrophic overfitting for the downstream
+# regressor. These high cardinality features are basically unique identifiers
+# for samples which should generally be removed from machine learning datasets.
+# In this example, we generate them to show how :class:`TargetEncoder`'s default
+# :term:`cross fitting` behavior mitigates the overfitting issue automatically.
 X_near_unique_categories = rng.choice(
     int(0.9 * n_samples), size=n_samples, replace=True
 ).reshape(-1, 1)
 
 # %%
 # Finally, we assemble the dataset and perform a train test split:
-from sklearn.model_selection import train_test_split
 import pandas as pd
 
+from sklearn.model_selection import train_test_split
+
 X = pd.DataFrame(
     np.concatenate(
         [X_informative, X_shuffled, X_near_unique_categories],
@@ -77,11 +88,12 @@
 # ==========================
 # In this section, we train a ridge regressor on the dataset with and without
 # encoding and explore the influence of target encoder with and without the
-# interval cross validation. First, we see the Ridge model trained on the
-# raw features will have low performance, because the order of the informative
-# feature is not informative:
-from sklearn.linear_model import Ridge
+# internal :term:`cross fitting`. First, we see the Ridge model trained on the
+# raw features will have low performance. This is because we permuted the order
+# of the informative feature meaning `X_informative` is not informative when
+# raw:
 import sklearn
+from sklearn.linear_model import Ridge
 
 # Configure transformers to always output DataFrames
 sklearn.set_config(transform_output="pandas")
@@ -94,67 +106,86 @@
 
 # %%
 # Next, we create a pipeline with the target encoder and ridge model. The pipeline
-# uses :meth:`TargetEncoder.fit_transform` which uses cross validation. We see that
-# the model fits the data well and generalizes to the test set:
+# uses :meth:`TargetEncoder.fit_transform` which uses :term:`cross fitting`. We
+# see that the model fits the data well and generalizes to the test set:
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import TargetEncoder
 
-model_with_cv = make_pipeline(TargetEncoder(random_state=0), ridge)
-model_with_cv.fit(X_train, y_train)
-print("Model with CV on training set: ", model_with_cv.score(X_train, y_train))
-print("Model with CV on test set: ", model_with_cv.score(X_test, y_test))
+model_with_cf = make_pipeline(TargetEncoder(random_state=0), ridge)
+model_with_cf.fit(X_train, y_train)
+print("Model with CF on train set: ", model_with_cf.score(X_train, y_train))
+print("Model with CF on test set: ", model_with_cf.score(X_test, y_test))
 
 # %%
 # The coefficients of the linear model shows that most of the weight is on the
 # feature at column index 0, which is the informative feature
-import pandas as pd
 import matplotlib.pyplot as plt
+import pandas as pd
 
 plt.rcParams["figure.constrained_layout.use"] = True
 
-coefs_cv = pd.Series(
-    model_with_cv[-1].coef_, index=model_with_cv[-1].feature_names_in_
+coefs_cf = pd.Series(
+    model_with_cf[-1].coef_, index=model_with_cf[-1].feature_names_in_
 ).sort_values()
-_ = coefs_cv.plot(kind="barh")
+ax = coefs_cf.plot(kind="barh")
+_ = ax.set(
+    title="Target encoded with cross fitting",
+    xlabel="Ridge coefficient",
+    ylabel="Feature",
+)
 
 # %%
-# While :meth:`TargetEncoder.fit_transform` uses an interval cross validation,
-# :meth:`TargetEncoder.transform` itself does not perform any cross validation.
-# It uses the aggregation of the complete training set to transform the categorical
-# features. Thus, we can use :meth:`TargetEncoder.fit` followed by
-# :meth:`TargetEncoder.transform` to disable the cross validation. This encoding
-# is then passed to the ridge model.
+# While :meth:`TargetEncoder.fit_transform` uses an internal
+# :term:`cross fitting` scheme to learn encodings for the training set,
+# :meth:`TargetEncoder.transform` itself does not.
+# It uses the complete training set to learn encodings and to transform the
+# categorical features. Thus, we can use :meth:`TargetEncoder.fit` followed by
+# :meth:`TargetEncoder.transform` to disable the :term:`cross fitting`. This
+# encoding is then passed to the ridge model.
 target_encoder = TargetEncoder(random_state=0)
 target_encoder.fit(X_train, y_train)
-X_train_no_cv_encoding = target_encoder.transform(X_train)
-X_test_no_cv_encoding = target_encoder.transform(X_test)
+X_train_no_cf_encoding = target_encoder.transform(X_train)
+X_test_no_cf_encoding = target_encoder.transform(X_test)
 
-model_no_cv = ridge.fit(X_train_no_cv_encoding, y_train)
+model_no_cf = ridge.fit(X_train_no_cf_encoding, y_train)
 
 # %%
-# We evaluate the model on the non-cross validated encoding and see that it overfits:
+# We evaluate the model that did not use :term:`cross fitting` when encoding and
+# see that it overfits:
 print(
-    "Model without CV on training set: ",
-    model_no_cv.score(X_train_no_cv_encoding, y_train),
+    "Model without CF on training set: ",
+    model_no_cf.score(X_train_no_cf_encoding, y_train),
 )
 print(
-    "Model without CV on test set: ", model_no_cv.score(X_test_no_cv_encoding, y_test)
+    "Model without CF on test set: ",
+    model_no_cf.score(
+        X_test_no_cf_encoding,
+        y_test,
+    ),
 )
 
 # %%
-# The ridge model overfits, because it assigns more weight to the extremely high
-# cardinality feature relative to the informative feature.
-coefs_no_cv = pd.Series(
-    model_no_cv.coef_, index=model_no_cv.feature_names_in_
+# The ridge model overfits because it assigns much more weight to the
+# uninformative extremely high cardinality ("near_unique") and medium
+# cardinality ("shuffled") features than when the model used
+# :term:`cross fitting` to encode the features.
+coefs_no_cf = pd.Series(
+    model_no_cf.coef_, index=model_no_cf.feature_names_in_
 ).sort_values()
-_ = coefs_no_cv.plot(kind="barh")
+ax = coefs_no_cf.plot(kind="barh")
+_ = ax.set(
+    title="Target encoded without cross fitting",
+    xlabel="Ridge coefficient",
+    ylabel="Feature",
+)
 
 # %%
 # Conclusion
 # ==========
-# This example demonstrates the importance of :class:`TargetEncoder`'s interval cross
-# validation. It is important to use :meth:`TargetEncoder.fit_transform` to encode
-# training data before passing it to a machine learning model. When a
-# :class:`TargetEncoder` is a part of a :class:`~sklearn.pipeline.Pipeline` and the
-# pipeline is fitted, the pipeline will correctly call
-# :meth:`TargetEncoder.fit_transform` and pass the encoding along.
+# This example demonstrates the importance of :class:`TargetEncoder`'s internal
+# :term:`cross fitting`. It is important to use
+# :meth:`TargetEncoder.fit_transform` to encode training data before passing it
+# to a machine learning model. When a :class:`TargetEncoder` is a part of a
+# :class:`~sklearn.pipeline.Pipeline` and the pipeline is fitted, the pipeline
+# will correctly call :meth:`TargetEncoder.fit_transform` and use
+# :term:`cross fitting` when encoding the training data.
diff --git a/examples/release_highlights/plot_release_highlights_0_22_0.py b/examples/release_highlights/plot_release_highlights_0_22_0.py
index 02b99df3491ee..2fc3c5782cf53 100644
--- a/examples/release_highlights/plot_release_highlights_0_22_0.py
+++ b/examples/release_highlights/plot_release_highlights_0_22_0.py
@@ -27,22 +27,22 @@
 # A new plotting API is available for creating visualizations. This new API
 # allows for quickly adjusting the visuals of a plot without involving any
 # recomputation. It is also possible to add different plots to the same
-# figure. The following example illustrates :class:`~metrics.plot_roc_curve`,
+# figure. The following example illustrates `plot_roc_curve`,
 # but other plots utilities are supported like
-# :class:`~inspection.plot_partial_dependence`,
-# :class:`~metrics.plot_precision_recall_curve`, and
-# :class:`~metrics.plot_confusion_matrix`. Read more about this new API in the
+# `plot_partial_dependence`,
+# `plot_precision_recall_curve`, and
+# `plot_confusion_matrix`. Read more about this new API in the
 # :ref:`User Guide <visualizations>`.
 
-from sklearn.model_selection import train_test_split
-from sklearn.svm import SVC
+import matplotlib.pyplot as plt
+
+from sklearn.datasets import make_classification
+from sklearn.ensemble import RandomForestClassifier
 
 # from sklearn.metrics import plot_roc_curve
 from sklearn.metrics import RocCurveDisplay
-
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.datasets import make_classification
-import matplotlib.pyplot as plt
+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC
 
 X, y = make_classification(random_state=0)
 X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
@@ -79,12 +79,12 @@
 # Read more in the :ref:`User Guide <stacking>`.
 
 from sklearn.datasets import load_iris
-from sklearn.svm import LinearSVC
-from sklearn.linear_model import LogisticRegression
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import make_pipeline
 from sklearn.ensemble import StackingClassifier
+from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import LinearSVC
 
 X, y = load_iris(return_X_y=True)
 estimators = [
@@ -102,8 +102,9 @@
 # The :func:`inspection.permutation_importance` can be used to get an
 # estimate of the importance of each feature, for any fitted estimator:
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.datasets import make_classification
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.inspection import permutation_importance
@@ -155,8 +156,9 @@
 # See more details in the :ref:`User Guide <neighbors_transformer>`.
 
 from tempfile import TemporaryDirectory
-from sklearn.neighbors import KNeighborsTransformer
+
 from sklearn.manifold import Isomap
+from sklearn.neighbors import KNeighborsTransformer
 from sklearn.pipeline import make_pipeline
 
 X, y = make_classification(random_state=0)
@@ -185,7 +187,7 @@
 # close if the features that neither is missing are close.
 # By default, a euclidean distance metric
 # that supports missing values,
-# :func:`~metrics.nan_euclidean_distances`, is used to find the nearest
+# :func:`~sklearn.metrics.pairwise.nan_euclidean_distances`, is used to find the nearest
 # neighbors.
 #
 # Read more in the :ref:`User Guide <knnimpute>`.
@@ -258,7 +260,7 @@ def test_sklearn_compatible_estimator(estimator, check):
 # %%
 # ROC AUC now supports multiclass classification
 # ----------------------------------------------
-# The :func:`roc_auc_score` function can also be used in multi-class
+# The :func:`~sklearn.metrics.roc_auc_score` function can also be used in multi-class
 # classification. Two averaging strategies are currently supported: the
 # one-vs-one algorithm computes the average of the pairwise ROC AUC scores, and
 # the one-vs-rest algorithm computes the average of the ROC AUC scores for each
@@ -272,8 +274,8 @@ def test_sklearn_compatible_estimator(estimator, check):
 
 
 from sklearn.datasets import make_classification
-from sklearn.svm import SVC
 from sklearn.metrics import roc_auc_score
+from sklearn.svm import SVC
 
 X, y = make_classification(n_classes=4, n_informative=16)
 clf = SVC(decision_function_shape="ovo", probability=True).fit(X, y)
diff --git a/examples/release_highlights/plot_release_highlights_0_23_0.py b/examples/release_highlights/plot_release_highlights_0_23_0.py
index 7c6836632e3f0..d7ae7465a590b 100644
--- a/examples/release_highlights/plot_release_highlights_0_23_0.py
+++ b/examples/release_highlights/plot_release_highlights_0_23_0.py
@@ -1,4 +1,4 @@
-# flake8: noqa
+# ruff: noqa
 """
 ========================================
 Release Highlights for scikit-learn 0.23
diff --git a/examples/release_highlights/plot_release_highlights_0_24_0.py b/examples/release_highlights/plot_release_highlights_0_24_0.py
index a55b4aabc7994..29082c1a078f4 100644
--- a/examples/release_highlights/plot_release_highlights_0_24_0.py
+++ b/examples/release_highlights/plot_release_highlights_0_24_0.py
@@ -1,4 +1,4 @@
-# flake8: noqa
+# ruff: noqa
 """
 ========================================
 Release Highlights for scikit-learn 0.24
diff --git a/examples/release_highlights/plot_release_highlights_1_0_0.py b/examples/release_highlights/plot_release_highlights_1_0_0.py
index 383612e611688..7ac09dd193c0f 100644
--- a/examples/release_highlights/plot_release_highlights_1_0_0.py
+++ b/examples/release_highlights/plot_release_highlights_1_0_0.py
@@ -1,4 +1,4 @@
-# flake8: noqa
+# ruff: noqa
 """
 =======================================
 Release Highlights for scikit-learn 1.0
diff --git a/examples/release_highlights/plot_release_highlights_1_1_0.py b/examples/release_highlights/plot_release_highlights_1_1_0.py
index 16887b5b1b493..b3058a7e0aa27 100644
--- a/examples/release_highlights/plot_release_highlights_1_1_0.py
+++ b/examples/release_highlights/plot_release_highlights_1_1_0.py
@@ -1,4 +1,4 @@
-# flake8: noqa
+# ruff: noqa
 """
 =======================================
 Release Highlights for scikit-learn 1.1
@@ -24,7 +24,7 @@
 # %%
 # Quantile loss in :class:`ensemble.HistGradientBoostingRegressor`
 # ----------------------------------------------------------------
-# :class:`ensemble.HistGradientBoostingRegressor` can model quantiles with
+# :class:`~ensemble.HistGradientBoostingRegressor` can model quantiles with
 # `loss="quantile"` and the new parameter `quantile`.
 from sklearn.ensemble import HistGradientBoostingRegressor
 import numpy as np
@@ -56,7 +56,7 @@
 # `get_feature_names_out` Available in all Transformers
 # -----------------------------------------------------
 # :term:`get_feature_names_out` is now available in all Transformers. This enables
-# :class:`pipeline.Pipeline` to construct the output feature names for more complex
+# :class:`~pipeline.Pipeline` to construct the output feature names for more complex
 # pipelines:
 from sklearn.compose import ColumnTransformer
 from sklearn.preprocessing import OneHotEncoder, StandardScaler
@@ -101,12 +101,13 @@
 
 
 # %%
-# Grouping infrequent categories in :class:`OneHotEncoder`
-# --------------------------------------------------------
-# :class:`OneHotEncoder` supports aggregating infrequent categories into a single
-# output for each feature. The parameters to enable the gathering of infrequent
-# categories are `min_frequency` and `max_categories`. See the
-# :ref:`User Guide <encoder_infrequent_categories>` for more details.
+# Grouping infrequent categories in :class:`~preprocessing.OneHotEncoder`
+# -----------------------------------------------------------------------
+# :class:`~preprocessing.OneHotEncoder` supports aggregating infrequent
+# categories into a single output for each feature. The parameters to enable
+# the gathering of infrequent categories are `min_frequency` and
+# `max_categories`. See the :ref:`User Guide <encoder_infrequent_categories>`
+# for more details.
 from sklearn.preprocessing import OneHotEncoder
 import numpy as np
 
@@ -165,14 +166,15 @@
 # - :class:`linear_model.TweedieRegressor`
 
 # %%
-# MiniBatchNMF: an online version of NMF
-# --------------------------------------
-# The new class :class:`decomposition.MiniBatchNMF` implements a faster but less
-# accurate version of non-negative matrix factorization (:class:`decomposition.NMF`).
-# :class:`MiniBatchNMF` divides the data into mini-batches and optimizes the NMF model
-# in an online manner by cycling over the mini-batches, making it better suited for
-# large datasets. In particular, it implements `partial_fit`, which can be used for
-# online learning when the data is not readily available from the start, or when the
+# :class:`~decomposition.MiniBatchNMF`: an online version of NMF
+# --------------------------------------------------------------
+# The new class :class:`~decomposition.MiniBatchNMF` implements a faster but
+# less accurate version of non-negative matrix factorization
+# (:class:`~decomposition.NMF`). :class:`~decomposition.MiniBatchNMF` divides the
+# data into mini-batches and optimizes the NMF model in an online manner by
+# cycling over the mini-batches, making it better suited for large datasets. In
+# particular, it implements `partial_fit`, which can be used for online
+# learning when the data is not readily available from the start, or when the
 # data does not fit into memory.
 import numpy as np
 from sklearn.decomposition import MiniBatchNMF
@@ -198,13 +200,14 @@
 )
 
 # %%
-# BisectingKMeans: divide and cluster
-# -----------------------------------
-# The new class :class:`cluster.BisectingKMeans` is a variant of :class:`KMeans`, using
-# divisive hierarchical clustering. Instead of creating all centroids at once, centroids
-# are picked progressively based on a previous clustering: a cluster is split into two
-# new clusters repeatedly until the target number of clusters is reached, giving a
-# hierarchical structure to the clustering.
+# :class:`~cluster.BisectingKMeans`: divide and cluster
+# -----------------------------------------------------
+# The new class :class:`~cluster.BisectingKMeans` is a variant of
+# :class:`~cluster.KMeans`, using divisive hierarchical clustering. Instead of
+# creating all centroids at once, centroids are picked progressively based on a
+# previous clustering: a cluster is split into two new clusters repeatedly
+# until the target number of clusters is reached, giving a hierarchical
+# structure to the clustering.
 from sklearn.datasets import make_blobs
 from sklearn.cluster import KMeans, BisectingKMeans
 import matplotlib.pyplot as plt
diff --git a/examples/release_highlights/plot_release_highlights_1_2_0.py b/examples/release_highlights/plot_release_highlights_1_2_0.py
index 8165c3bc4eed0..695e74cfcdd64 100644
--- a/examples/release_highlights/plot_release_highlights_1_2_0.py
+++ b/examples/release_highlights/plot_release_highlights_1_2_0.py
@@ -1,4 +1,4 @@
-# flake8: noqa
+# ruff: noqa
 """
 =======================================
 Release Highlights for scikit-learn 1.2
diff --git a/examples/release_highlights/plot_release_highlights_1_3_0.py b/examples/release_highlights/plot_release_highlights_1_3_0.py
new file mode 100644
index 0000000000000..5ce2617cd08aa
--- /dev/null
+++ b/examples/release_highlights/plot_release_highlights_1_3_0.py
@@ -0,0 +1,156 @@
+# ruff: noqa
+"""
+=======================================
+Release Highlights for scikit-learn 1.3
+=======================================
+
+.. currentmodule:: sklearn
+
+We are pleased to announce the release of scikit-learn 1.3! Many bug fixes
+and improvements were added, as well as some new key features. We detail
+below a few of the major features of this release. **For an exhaustive list of
+all the changes**, please refer to the :ref:`release notes <changes_1_3>`.
+
+To install the latest version (with pip)::
+
+    pip install --upgrade scikit-learn
+
+or with conda::
+
+    conda install -c conda-forge scikit-learn
+
+"""
+
+# %%
+# Metadata Routing
+# ----------------
+# We are in the process of introducing a new way to route metadata such as
+# ``sample_weight`` throughout the codebase, which would affect how
+# meta-estimators such as :class:`pipeline.Pipeline` and
+# :class:`model_selection.GridSearchCV` route metadata. While the
+# infrastructure for this feature is already included in this release, the work
+# is ongoing and not all meta-estimators support this new feature. You can read
+# more about this feature in the :ref:`Metadata Routing User Guide
+# <metadata_routing>`. Note that this feature is still under development and
+# not implemented for most meta-estimators.
+#
+# Third party developers can already start incorporating this into their
+# meta-estimators. For more details, see
+# :ref:`metadata routing developer guide
+# <sphx_glr_auto_examples_miscellaneous_plot_metadata_routing.py>`.
+
+# %%
+# HDBSCAN: hierarchical density-based clustering
+# ----------------------------------------------
+# Originally hosted in the scikit-learn-contrib repository, :class:`cluster.HDBSCAN`
+# has been adpoted into scikit-learn. It's missing a few features from the original
+# implementation which will be added in future releases.
+# By performing a modified version of :class:`cluster.DBSCAN` over multiple epsilon
+# values simultaneously, :class:`cluster.HDBSCAN` finds clusters of varying densities
+# making it more robust to parameter selection than :class:`cluster.DBSCAN`.
+# More details in the :ref:`User Guide <hdbscan>`.
+import numpy as np
+from sklearn.cluster import HDBSCAN
+from sklearn.datasets import load_digits
+from sklearn.metrics import v_measure_score
+
+X, true_labels = load_digits(return_X_y=True)
+print(f"number of digits: {len(np.unique(true_labels))}")
+
+hdbscan = HDBSCAN(min_cluster_size=15).fit(X)
+non_noisy_labels = hdbscan.labels_[hdbscan.labels_ != -1]
+print(f"number of clusters found: {len(np.unique(non_noisy_labels))}")
+
+print(v_measure_score(true_labels[hdbscan.labels_ != -1], non_noisy_labels))
+
+# %%
+# TargetEncoder: a new category encoding strategy
+# -----------------------------------------------
+# Well suited for categorical features with high cardinality,
+# :class:`preprocessing.TargetEncoder` encodes the categories based on a shrunk
+# estimate of the average target values for observations belonging to that category.
+# More details in the :ref:`User Guide <target_encoder>`.
+import numpy as np
+from sklearn.preprocessing import TargetEncoder
+
+X = np.array([["cat"] * 30 + ["dog"] * 20 + ["snake"] * 38], dtype=object).T
+y = [90.3] * 30 + [20.4] * 20 + [21.2] * 38
+
+enc = TargetEncoder(random_state=0)
+X_trans = enc.fit_transform(X, y)
+
+enc.encodings_
+
+# %%
+# Missing values support in decision trees
+# ----------------------------------------
+# The classes :class:`tree.DecisionTreeClassifier` and
+# :class:`tree.DecisionTreeRegressor` now support missing values. For each potential
+# threshold on the non-missing data, the splitter will evaluate the split with all the
+# missing values going to the left node or the right node.
+# More details in the :ref:`User Guide <tree_missing_value_support>`.
+import numpy as np
+from sklearn.tree import DecisionTreeClassifier
+
+X = np.array([0, 1, 6, np.nan]).reshape(-1, 1)
+y = [0, 0, 1, 1]
+
+tree = DecisionTreeClassifier(random_state=0).fit(X, y)
+tree.predict(X)
+
+# %%
+# New display `model_selection.ValidationCurveDisplay`
+# ----------------------------------------------------
+# :class:`model_selection.ValidationCurveDisplay` is now available to plot results
+# from :func:`model_selection.validation_curve`.
+from sklearn.datasets import make_classification
+from sklearn.linear_model import LogisticRegression
+from sklearn.model_selection import ValidationCurveDisplay
+
+X, y = make_classification(1000, 10, random_state=0)
+
+_ = ValidationCurveDisplay.from_estimator(
+    LogisticRegression(),
+    X,
+    y,
+    param_name="C",
+    param_range=np.geomspace(1e-5, 1e3, num=9),
+    score_type="both",
+    score_name="Accuracy",
+)
+
+# %%
+# Gamma loss for gradient boosting
+# --------------------------------
+# The class :class:`ensemble.HistGradientBoostingRegressor` supports the
+# Gamma deviance loss function via `loss="gamma"`. This loss function is useful for
+# modeling strictly positive targets with a right-skewed distribution.
+import numpy as np
+from sklearn.model_selection import cross_val_score
+from sklearn.datasets import make_low_rank_matrix
+from sklearn.ensemble import HistGradientBoostingRegressor
+
+n_samples, n_features = 500, 10
+rng = np.random.RandomState(0)
+X = make_low_rank_matrix(n_samples, n_features, random_state=rng)
+coef = rng.uniform(low=-10, high=20, size=n_features)
+y = rng.gamma(shape=2, scale=np.exp(X @ coef) / 2)
+gbdt = HistGradientBoostingRegressor(loss="gamma")
+cross_val_score(gbdt, X, y).mean()
+
+# %%
+# Grouping infrequent categories in :class:`preprocessing.OrdinalEncoder`
+# -----------------------------------------------------------------------
+# Similarly to :class:`preprocessing.OneHotEncoder`, the class
+# :class:`preprocessing.OrdinalEncoder` now supports aggregating infrequent categories
+# into a single output for each feature. The parameters to enable the gathering of
+# infrequent categories are `min_frequency` and `max_categories`.
+# See the :ref:`User Guide <encoder_infrequent_categories>` for more details.
+from sklearn.preprocessing import OrdinalEncoder
+import numpy as np
+
+X = np.array(
+    [["dog"] * 5 + ["cat"] * 20 + ["rabbit"] * 10 + ["snake"] * 3], dtype=object
+).T
+enc = OrdinalEncoder(min_frequency=6).fit(X)
+enc.infrequent_categories_
diff --git a/examples/semi_supervised/plot_label_propagation_digits.py b/examples/semi_supervised/plot_label_propagation_digits.py
index f848e3b76e084..bfdff8e362e47 100644
--- a/examples/semi_supervised/plot_label_propagation_digits.py
+++ b/examples/semi_supervised/plot_label_propagation_digits.py
@@ -24,9 +24,10 @@ class will be very good.
 # ---------------
 #
 # We use the digits dataset. We only use a subset of randomly selected samples.
-from sklearn import datasets
 import numpy as np
 
+from sklearn import datasets
+
 digits = datasets.load_digits()
 rng = np.random.RandomState(2)
 indices = np.arange(len(digits.data))
@@ -59,8 +60,8 @@ class will be very good.
 #
 # We fit a :class:`~sklearn.semi_supervised.LabelSpreading` and use it to predict
 # the unknown labels.
-from sklearn.semi_supervised import LabelSpreading
 from sklearn.metrics import classification_report
+from sklearn.semi_supervised import LabelSpreading
 
 lp_model = LabelSpreading(gamma=0.25, max_iter=20)
 lp_model.fit(X, y_train)
diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
index 3a1f533c8a281..45af1d7891b2e 100644
--- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
+++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
@@ -23,13 +23,13 @@
 # Authors: Clay Woolam <clay@woolam.org>
 # License: BSD
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 from scipy import stats
 
 from sklearn import datasets
-from sklearn.semi_supervised import LabelSpreading
 from sklearn.metrics import classification_report, confusion_matrix
+from sklearn.semi_supervised import LabelSpreading
 
 digits = datasets.load_digits()
 rng = np.random.RandomState(0)
@@ -79,7 +79,7 @@
     # select up to 5 digit examples that the classifier is most uncertain about
     uncertainty_index = np.argsort(pred_entropies)[::-1]
     uncertainty_index = uncertainty_index[
-        np.in1d(uncertainty_index, unlabeled_indices)
+        np.isin(uncertainty_index, unlabeled_indices)
     ][:5]
 
     # keep track of indices that we get labels for
diff --git a/examples/semi_supervised/plot_label_propagation_structure.py b/examples/semi_supervised/plot_label_propagation_structure.py
index 5de6e9f20a7e3..cfcd1c1bf5a54 100644
--- a/examples/semi_supervised/plot_label_propagation_structure.py
+++ b/examples/semi_supervised/plot_label_propagation_structure.py
@@ -22,6 +22,7 @@
 # Here, all labels but two are tagged as unknown.
 
 import numpy as np
+
 from sklearn.datasets import make_circles
 
 n_samples = 200
diff --git a/examples/semi_supervised/plot_self_training_varying_threshold.py b/examples/semi_supervised/plot_self_training_varying_threshold.py
index 801e48b8411f5..2c7a485d06eb0 100644
--- a/examples/semi_supervised/plot_self_training_varying_threshold.py
+++ b/examples/semi_supervised/plot_self_training_varying_threshold.py
@@ -32,13 +32,14 @@
 # Authors: Oliver Rausch <rauscho@ethz.ch>
 # License: BSD
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import datasets
-from sklearn.svm import SVC
+from sklearn.metrics import accuracy_score
 from sklearn.model_selection import StratifiedKFold
 from sklearn.semi_supervised import SelfTrainingClassifier
-from sklearn.metrics import accuracy_score
+from sklearn.svm import SVC
 from sklearn.utils import shuffle
 
 n_splits = 3
diff --git a/examples/semi_supervised/plot_semi_supervised_newsgroups.py b/examples/semi_supervised/plot_semi_supervised_newsgroups.py
index 609f5d10247c2..58c7f6e42f408 100644
--- a/examples/semi_supervised/plot_semi_supervised_newsgroups.py
+++ b/examples/semi_supervised/plot_semi_supervised_newsgroups.py
@@ -15,15 +15,13 @@
 import numpy as np
 
 from sklearn.datasets import fetch_20newsgroups
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.feature_extraction.text import TfidfTransformer
-from sklearn.preprocessing import FunctionTransformer
+from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
 from sklearn.linear_model import SGDClassifier
+from sklearn.metrics import f1_score
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
-from sklearn.semi_supervised import SelfTrainingClassifier
-from sklearn.semi_supervised import LabelSpreading
-from sklearn.metrics import f1_score
+from sklearn.preprocessing import FunctionTransformer
+from sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier
 
 # Loading dataset containing first five categories
 data = fetch_20newsgroups(
diff --git a/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py b/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py
index 402cd41d6a0f2..766f7ea0a79c6 100644
--- a/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py
+++ b/examples/semi_supervised/plot_semi_supervised_versus_svm_iris.py
@@ -18,13 +18,12 @@
 #          Oliver Rausch <rauscho@ethz.ch>
 # License: BSD
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import datasets
+from sklearn.semi_supervised import LabelSpreading, SelfTrainingClassifier
 from sklearn.svm import SVC
-from sklearn.semi_supervised import LabelSpreading
-from sklearn.semi_supervised import SelfTrainingClassifier
-
 
 iris = datasets.load_iris()
 
diff --git a/examples/svm/plot_custom_kernel.py b/examples/svm/plot_custom_kernel.py
index c2c3bc6e6ba28..cacd67ed056ac 100644
--- a/examples/svm/plot_custom_kernel.py
+++ b/examples/svm/plot_custom_kernel.py
@@ -8,9 +8,10 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
-from sklearn import svm, datasets
+import numpy as np
+
+from sklearn import datasets, svm
 from sklearn.inspection import DecisionBoundaryDisplay
 
 # import some data to play with
diff --git a/examples/svm/plot_iris_svc.py b/examples/svm/plot_iris_svc.py
index 5bcc81dd91d04..61aba3cc06602 100644
--- a/examples/svm/plot_iris_svc.py
+++ b/examples/svm/plot_iris_svc.py
@@ -35,9 +35,9 @@
 """
 
 import matplotlib.pyplot as plt
-from sklearn import svm, datasets
-from sklearn.inspection import DecisionBoundaryDisplay
 
+from sklearn import datasets, svm
+from sklearn.inspection import DecisionBoundaryDisplay
 
 # import some data to play with
 iris = datasets.load_iris()
diff --git a/examples/svm/plot_linearsvc_support_vectors.py b/examples/svm/plot_linearsvc_support_vectors.py
index 638579f36f3c3..60e9a3e6f32f9 100644
--- a/examples/svm/plot_linearsvc_support_vectors.py
+++ b/examples/svm/plot_linearsvc_support_vectors.py
@@ -9,11 +9,12 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.datasets import make_blobs
-from sklearn.svm import LinearSVC
 from sklearn.inspection import DecisionBoundaryDisplay
+from sklearn.svm import LinearSVC
 
 X, y = make_blobs(n_samples=40, centers=2, random_state=0)
 
diff --git a/examples/svm/plot_oneclass.py b/examples/svm/plot_oneclass.py
index 082cbcd6de2be..d4348fa0ec435 100644
--- a/examples/svm/plot_oneclass.py
+++ b/examples/svm/plot_oneclass.py
@@ -11,9 +11,10 @@
 
 """
 
-import numpy as np
-import matplotlib.pyplot as plt
 import matplotlib.font_manager
+import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import svm
 
 xx, yy = np.meshgrid(np.linspace(-5, 5, 500), np.linspace(-5, 5, 500))
diff --git a/examples/svm/plot_rbf_parameters.py b/examples/svm/plot_rbf_parameters.py
index fa4310134487a..ba0154b477b46 100644
--- a/examples/svm/plot_rbf_parameters.py
+++ b/examples/svm/plot_rbf_parameters.py
@@ -135,9 +135,8 @@ def __call__(self, value, clip=None):
 # 10 is often helpful. Using a basis of 2, a finer
 # tuning can be achieved but at a much higher cost.
 
+from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
 from sklearn.svm import SVC
-from sklearn.model_selection import StratifiedShuffleSplit
-from sklearn.model_selection import GridSearchCV
 
 C_range = np.logspace(-2, 10, 13)
 gamma_range = np.logspace(-9, 3, 13)
diff --git a/examples/svm/plot_separating_hyperplane.py b/examples/svm/plot_separating_hyperplane.py
index 45bacff6a2b97..23f464169f516 100644
--- a/examples/svm/plot_separating_hyperplane.py
+++ b/examples/svm/plot_separating_hyperplane.py
@@ -10,11 +10,11 @@
 """
 
 import matplotlib.pyplot as plt
+
 from sklearn import svm
 from sklearn.datasets import make_blobs
 from sklearn.inspection import DecisionBoundaryDisplay
 
-
 # we create 40 separable points
 X, y = make_blobs(n_samples=40, centers=2, random_state=6)
 
diff --git a/examples/svm/plot_separating_hyperplane_unbalanced.py b/examples/svm/plot_separating_hyperplane_unbalanced.py
index fe71420ffd0b3..6fd7de98f3fb6 100644
--- a/examples/svm/plot_separating_hyperplane_unbalanced.py
+++ b/examples/svm/plot_separating_hyperplane_unbalanced.py
@@ -26,6 +26,7 @@
 """
 
 import matplotlib.pyplot as plt
+
 from sklearn import svm
 from sklearn.datasets import make_blobs
 from sklearn.inspection import DecisionBoundaryDisplay
diff --git a/examples/svm/plot_svm_anova.py b/examples/svm/plot_svm_anova.py
index 3652fae3e979a..3d5a934bf4884 100644
--- a/examples/svm/plot_svm_anova.py
+++ b/examples/svm/plot_svm_anova.py
@@ -14,6 +14,7 @@
 # Load some data to play with
 # ---------------------------
 import numpy as np
+
 from sklearn.datasets import load_iris
 
 X, y = load_iris(return_X_y=True)
@@ -25,8 +26,8 @@
 # %%
 # Create the pipeline
 # -------------------
-from sklearn.pipeline import Pipeline
 from sklearn.feature_selection import SelectPercentile, f_classif
+from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.svm import SVC
 
@@ -45,6 +46,7 @@
 # Plot the cross-validation score as a function of percentile of features
 # -----------------------------------------------------------------------
 import matplotlib.pyplot as plt
+
 from sklearn.model_selection import cross_val_score
 
 score_means = list()
diff --git a/examples/svm/plot_svm_kernels.py b/examples/svm/plot_svm_kernels.py
index fac86e8a93c7a..b83d645ac056a 100644
--- a/examples/svm/plot_svm_kernels.py
+++ b/examples/svm/plot_svm_kernels.py
@@ -1,93 +1,273 @@
 """
 =========================================================
-SVM-Kernels
+Plot classification boundaries with different SVM Kernels
 =========================================================
+This example shows how different kernels in a :class:`~sklearn.svm.SVC` (Support Vector
+Classifier) influence the classification boundaries in a binary, two-dimensional
+classification problem.
 
-Three different types of SVM-Kernels are displayed below.
-The polynomial and RBF are especially useful when the
-data-points are not linearly separable.
+SVCs aim to find a hyperplane that effectively separates the classes in their training
+data by maximizing the margin between the outermost data points of each class. This is
+achieved by finding the best weight vector :math:`w` that defines the decision boundary
+hyperplane and minimizes the sum of hinge losses for misclassified samples, as measured
+by the :func:`~sklearn.metrics.hinge_loss` function. By default, regularization is
+applied with the parameter `C=1`, which allows for a certain degree of misclassification
+tolerance.
 
+If the data is not linearly separable in the original feature space, a non-linear kernel
+parameter can be set. Depending on the kernel, the process involves adding new features
+or transforming existing features to enrich and potentially add meaning to the data.
+When a kernel other than `"linear"` is set, the SVC applies the `kernel trick
+<https://en.wikipedia.org/wiki/Kernel_method#Mathematics:_the_kernel_trick>`__, which
+computes the similarity between pairs of data points using the kernel function without
+explicitly transforming the entire dataset. The kernel trick surpasses the otherwise
+necessary matrix transformation of the whole dataset by only considering the relations
+between all pairs of data points. The kernel function maps two vectors (each pair of
+observations) to their similarity using their dot product.
 
+The hyperplane can then be calculated using the kernel function as if the dataset were
+represented in a higher-dimensional space. Using a kernel function instead of an
+explicit matrix transformation improves performance, as the kernel function has a time
+complexity of :math:`O({n}^2)`, whereas matrix transformation scales according to the
+specific transformation being applied.
+
+In this example, we compare the most common kernel types of Support Vector Machines: the
+linear kernel (`"linear"`), the polynomial kernel (`"poly"`), the radial basis function
+kernel (`"rbf"`) and the sigmoid kernel (`"sigmoid"`).
 """
 
 # Code source: Gaël Varoquaux
 # License: BSD 3 clause
 
-import numpy as np
+# %%
+# Creating a dataset
+# ------------------
+# We create a two-dimensional classification dataset with 16 samples and two classes. We
+# plot the samples with the colors matching their respective targets.
 import matplotlib.pyplot as plt
+import numpy as np
+
+X = np.array(
+    [
+        [0.4, -0.7],
+        [-1.5, -1.0],
+        [-1.4, -0.9],
+        [-1.3, -1.2],
+        [-1.1, -0.2],
+        [-1.2, -0.4],
+        [-0.5, 1.2],
+        [-1.5, 2.1],
+        [1.0, 1.0],
+        [1.3, 0.8],
+        [1.2, 0.5],
+        [0.2, -2.0],
+        [0.5, -2.4],
+        [0.2, -2.3],
+        [0.0, -2.7],
+        [1.3, 2.1],
+    ]
+)
+
+y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1])
+
+# Plotting settings
+fig, ax = plt.subplots(figsize=(4, 3))
+x_min, x_max, y_min, y_max = -3, 3, -3, 3
+ax.set(xlim=(x_min, x_max), ylim=(y_min, y_max))
+
+# Plot samples by color and add legend
+scatter = ax.scatter(X[:, 0], X[:, 1], s=150, c=y, label=y, edgecolors="k")
+ax.legend(*scatter.legend_elements(), loc="upper right", title="Classes")
+ax.set_title("Samples in two-dimensional feature space")
+_ = plt.show()
+
+# %%
+# We can see that the samples are not clearly separable by a straight line.
+#
+# Training SVC model and plotting decision boundaries
+# ---------------------------------------------------
+# We define a function that fits a :class:`~sklearn.svm.SVC` classifier,
+# allowing the `kernel` parameter as an input, and then plots the decision
+# boundaries learned by the model using
+# :class:`~sklearn.inspection.DecisionBoundaryDisplay`.
+#
+# Notice that for the sake of simplicity, the `C` parameter is set to its
+# default value (`C=1`) in this example and the `gamma` parameter is set to
+# `gamma=2` across all kernels, although it is automatically ignored for the
+# linear kernel. In a real classification task, where performance matters,
+# parameter tuning (by using :class:`~sklearn.model_selection.GridSearchCV` for
+# instance) is highly recommended to capture different structures within the
+# data.
+#
+# Setting `response_method="predict"` in
+# :class:`~sklearn.inspection.DecisionBoundaryDisplay` colors the areas based
+# on their predicted class. Using `response_method="decision_function"` allows
+# us to also plot the decision boundary and the margins to both sides of it.
+# Finally the support vectors used during training (which always lay on the
+# margins) are identified by means of the `support_vectors_` attribute of
+# the trained SVCs, and plotted as well.
 from sklearn import svm
+from sklearn.inspection import DecisionBoundaryDisplay
+
 
+def plot_training_data_with_decision_boundary(kernel):
+    # Train the SVC
+    clf = svm.SVC(kernel=kernel, gamma=2).fit(X, y)
+
+    # Settings for plotting
+    _, ax = plt.subplots(figsize=(4, 3))
+    x_min, x_max, y_min, y_max = -3, 3, -3, 3
+    ax.set(xlim=(x_min, x_max), ylim=(y_min, y_max))
+
+    # Plot decision boundary and margins
+    common_params = {"estimator": clf, "X": X, "ax": ax}
+    DecisionBoundaryDisplay.from_estimator(
+        **common_params,
+        response_method="predict",
+        plot_method="pcolormesh",
+        alpha=0.3,
+    )
+    DecisionBoundaryDisplay.from_estimator(
+        **common_params,
+        response_method="decision_function",
+        plot_method="contour",
+        levels=[-1, 0, 1],
+        colors=["k", "k", "k"],
+        linestyles=["--", "-", "--"],
+    )
 
-# Our dataset and targets
-X = np.c_[
-    (0.4, -0.7),
-    (-1.5, -1),
-    (-1.4, -0.9),
-    (-1.3, -1.2),
-    (-1.1, -0.2),
-    (-1.2, -0.4),
-    (-0.5, 1.2),
-    (-1.5, 2.1),
-    (1, 1),
-    # --
-    (1.3, 0.8),
-    (1.2, 0.5),
-    (0.2, -2),
-    (0.5, -2.4),
-    (0.2, -2.3),
-    (0, -2.7),
-    (1.3, 2.1),
-].T
-Y = [0] * 8 + [1] * 8
-
-# figure number
-fignum = 1
-
-# fit the model
-for kernel in ("linear", "poly", "rbf"):
-    clf = svm.SVC(kernel=kernel, gamma=2)
-    clf.fit(X, Y)
-
-    # plot the line, the points, and the nearest vectors to the plane
-    plt.figure(fignum, figsize=(4, 3))
-    plt.clf()
-
-    plt.scatter(
+    # Plot bigger circles around samples that serve as support vectors
+    ax.scatter(
         clf.support_vectors_[:, 0],
         clf.support_vectors_[:, 1],
-        s=80,
+        s=250,
         facecolors="none",
-        zorder=10,
         edgecolors="k",
     )
-    plt.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired, edgecolors="k")
-
-    plt.axis("tight")
-    x_min = -3
-    x_max = 3
-    y_min = -3
-    y_max = 3
-
-    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
-    Z = clf.decision_function(np.c_[XX.ravel(), YY.ravel()])
-
-    # Put the result into a color plot
-    Z = Z.reshape(XX.shape)
-    plt.figure(fignum, figsize=(4, 3))
-    plt.pcolormesh(XX, YY, Z > 0, cmap=plt.cm.Paired)
-    plt.contour(
-        XX,
-        YY,
-        Z,
-        colors=["k", "k", "k"],
-        linestyles=["--", "-", "--"],
-        levels=[-0.5, 0, 0.5],
-    )
+    # Plot samples by color and add legend
+    ax.scatter(X[:, 0], X[:, 1], c=y, s=150, edgecolors="k")
+    ax.legend(*scatter.legend_elements(), loc="upper right", title="Classes")
+    ax.set_title(f" Decision boundaries of {kernel} kernel in SVC")
+
+    _ = plt.show()
+
+
+# %%
+# Linear kernel
+# *************
+# Linear kernel is the dot product of the input samples:
+#
+# .. math:: K(\mathbf{x}_1, \mathbf{x}_2) = \mathbf{x}_1^\top \mathbf{x}_2
+#
+# It is then applied to any combination of two data points (samples) in the
+# dataset. The dot product of the two points determines the
+# :func:`~sklearn.metrics.pairwise.cosine_similarity` between both points. The
+# higher the value, the more similar the points are.
+plot_training_data_with_decision_boundary("linear")
+
+# %%
+# Training a :class:`~sklearn.svm.SVC` on a linear kernel results in an
+# untransformed feature space, where the hyperplane and the margins are
+# straight lines. Due to the lack of expressivity of the linear kernel, the
+# trained classes do not perfectly capture the training data.
+#
+# Polynomial kernel
+# *****************
+# The polynomial kernel changes the notion of similarity. The kernel function
+# is defined as:
+#
+# .. math::
+#   K(\mathbf{x}_1, \mathbf{x}_2) = (\gamma \cdot \
+#       \mathbf{x}_1^\top\mathbf{x}_2 + r)^d
+#
+# where :math:`{d}` is the degree (`degree`) of the polynomial, :math:`{\gamma}`
+# (`gamma`) controls the influence of each individual training sample on the
+# decision boundary and :math:`{r}` is the bias term (`coef0`) that shifts the
+# data up or down. Here, we use the default value for the degree of the
+# polynomal in the kernel funcion (`degree=3`). When `coef0=0` (the default),
+# the data is only transformed, but no additional dimension is added. Using a
+# polynomial kernel is equivalent to creating
+# :class:`~sklearn.preprocessing.PolynomialFeatures` and then fitting a
+# :class:`~sklearn.svm.SVC` with a linear kernel on the transformed data,
+# although this alternative approach would be computationally expensive for most
+# datasets.
+plot_training_data_with_decision_boundary("poly")
+
+# %%
+# The polynomial kernel with `gamma=2`` adapts well to the training data,
+# causing the margins on both sides of the hyperplane to bend accordingly.
+#
+# RBF kernel
+# **********
+# The radial basis function (RBF) kernel, also known as the Gaussian kernel, is
+# the default kernel for Support Vector Machines in scikit-learn. It measures
+# similarity between two data points in infinite dimensions and then approaches
+# classification by majority vote. The kernel function is defined as:
+#
+# .. math::
+#   K(\mathbf{x}_1, \mathbf{x}_2) = \exp\left(-\gamma \cdot
+#       {\|\mathbf{x}_1 - \mathbf{x}_2\|^2}\right)
+#
+# where :math:`{\gamma}` (`gamma`) controls the influence of each individual
+# training sample on the decision boundary.
+#
+# The larger the euclidean distance between two points
+# :math:`\|\mathbf{x}_1 - \mathbf{x}_2\|^2`
+# the closer the kernel function is to zero. This means that two points far away
+# are more likely to be dissimilar.
+plot_training_data_with_decision_boundary("rbf")
+
+# %%
+# In the plot we can see how the decision boundaries tend to contract around
+# data points that are close to each other.
+#
+# Sigmoid kernel
+# **************
+# The sigmoid kernel function is defined as:
+#
+# .. math::
+#   K(\mathbf{x}_1, \mathbf{x}_2) = \tanh(\gamma \cdot
+#       \mathbf{x}_1^\top\mathbf{x}_2 + r)
+#
+# where the kernel coefficient :math:`{\gamma}` (`gamma`) controls the influence
+# of each individual training sample on the decision boundary and :math:`{r}` is
+# the bias term (`coef0`) that shifts the data up or down.
+#
+# In the sigmoid kernel, the similarity between two data points is computed
+# using the hyperbolic tangent function (:math:`\tanh`). The kernel function
+# scales and possibly shifts the dot product of the two points
+# (:math:`\mathbf{x}_1` and :math:`\mathbf{x}_2`).
 
-    plt.xlim(x_min, x_max)
-    plt.ylim(y_min, y_max)
+plot_training_data_with_decision_boundary("sigmoid")
 
-    plt.xticks(())
-    plt.yticks(())
-    fignum = fignum + 1
-plt.show()
+# %%
+# We can see that the decision boundaries obtained with the sigmoid kernel
+# appear curved and irregular. The decision boundary tries to separate the
+# classes by fitting a sigmoid-shaped curve, resulting in a complex boundary
+# that may not generalize well to unseen data. From this example it becomes
+# obvious, that the sigmoid kernel has very specific use cases, when dealing
+# with data that exhibits a sigmoidal shape. In this example, careful fine
+# tuning might find more generalizable decision boundaries. Because of it's
+# specificity, the sigmoid kernel is less commonly used in practice compared to
+# other kernels.
+#
+# Conclusion
+# ----------
+# In this example, we have visualized the decision boundaries trained with the
+# provided dataset. The plots serve as an intuitive demonstration of how
+# different kernels utilize the training data to determine the classification
+# boundaries.
+#
+# The hyperplanes and margins, although computed indirectly, can be imagined as
+# planes in the transformed feature space. However, in the plots, they are
+# represented relative to the original feature space, resulting in curved
+# decision boundaries for the polynomial, RBF, and sigmoid kernels.
+#
+# Please note that the plots do not evaluate the individual kernel's accuracy or
+# quality. They are intended to provide a visual understanding of how the
+# different kernels use the training data.
+#
+# For a comprehensive evaluation, fine-tuning of :class:`~sklearn.svm.SVC`
+# parameters using techniques such as
+# :class:`~sklearn.model_selection.GridSearchCV` is recommended to capture the
+# underlying structures within the data.
diff --git a/examples/svm/plot_svm_margin.py b/examples/svm/plot_svm_margin.py
index f3717ecaa24ed..b8253264a4ad0 100644
--- a/examples/svm/plot_svm_margin.py
+++ b/examples/svm/plot_svm_margin.py
@@ -17,8 +17,9 @@
 # Modified for documentation by Jaques Grobler
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import svm
 
 # we create 40 separable points
diff --git a/examples/svm/plot_svm_nonlinear.py b/examples/svm/plot_svm_nonlinear.py
index f88231b4b6af4..4990e509661a1 100644
--- a/examples/svm/plot_svm_nonlinear.py
+++ b/examples/svm/plot_svm_nonlinear.py
@@ -11,8 +11,9 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import svm
 
 xx, yy = np.meshgrid(np.linspace(-3, 3, 500), np.linspace(-3, 3, 500))
diff --git a/examples/svm/plot_svm_regression.py b/examples/svm/plot_svm_regression.py
index 75a16b571c3ea..ab34528a37af6 100644
--- a/examples/svm/plot_svm_regression.py
+++ b/examples/svm/plot_svm_regression.py
@@ -7,9 +7,10 @@
 
 """
 
+import matplotlib.pyplot as plt
 import numpy as np
+
 from sklearn.svm import SVR
-import matplotlib.pyplot as plt
 
 # %%
 # Generate sample data
diff --git a/examples/svm/plot_svm_scale_c.py b/examples/svm/plot_svm_scale_c.py
index 4ba025cffac8e..ea09f03ec7f95 100644
--- a/examples/svm/plot_svm_scale_c.py
+++ b/examples/svm/plot_svm_scale_c.py
@@ -3,9 +3,8 @@
 Scaling the regularization parameter for SVCs
 ==============================================
 
-The following example illustrates the effect of scaling the
-regularization parameter when using :ref:`svm` for
-:ref:`classification <svm_classification>`.
+The following example illustrates the effect of scaling the regularization
+parameter when using :ref:`svm` for :ref:`classification <svm_classification>`.
 For SVC classification, we are interested in a risk minimization for the
 equation:
 
@@ -21,25 +20,18 @@
       and our model parameters.
     - :math:`\Omega` is a `penalty` function of our model parameters
 
-If we consider the loss function to be the individual error per
-sample, then the data-fit term, or the sum of the error for each sample, will
-increase as we add more samples. The penalization term, however, will not
-increase.
-
-When using, for example, :ref:`cross validation <cross_validation>`, to
-set the amount of regularization with `C`, there will be a
-different amount of samples between the main problem and the smaller problems
-within the folds of the cross validation.
-
-Since our loss function is dependent on the amount of samples, the latter
-will influence the selected value of `C`.
-The question that arises is "How do we optimally adjust C to
-account for the different amount of training samples?"
-
-In the remainder of this example, we will investigate the effect of scaling
-the value of the regularization parameter `C` in regards to the number of
-samples for both L1 and L2 penalty. We will generate some synthetic datasets
-that are appropriate for each type of regularization.
+If we consider the loss function to be the individual error per sample, then the
+data-fit term, or the sum of the error for each sample, increases as we add more
+samples. The penalization term, however, does not increase.
+
+When using, for example, :ref:`cross validation <cross_validation>`, to set the
+amount of regularization with `C`, there would be a different amount of samples
+between the main problem and the smaller problems within the folds of the cross
+validation.
+
+Since the loss function dependens on the amount of samples, the latter
+influences the selected value of `C`. The question that arises is "How do we
+optimally adjust C to account for the different amount of training samples?"
 """
 
 # Author: Andreas Mueller <amueller@ais.uni-bonn.de>
@@ -47,18 +39,16 @@
 # License: BSD 3 clause
 
 # %%
-# L1-penalty case
+# Data generation
 # ---------------
-# In the L1 case, theory says that prediction consistency (i.e. that under
-# given hypothesis, the estimator learned predicts as well as a model knowing
-# the true distribution) is not possible because of the bias of the L1. It
-# does say, however, that model consistency, in terms of finding the right set
-# of non-zero parameters as well as their signs, can be achieved by scaling
-# `C`.
 #
-# We will demonstrate this effect by using a synthetic dataset. This
-# dataset will be sparse, meaning that only a few features will be informative
-# and useful for the model.
+# In this example we investigate the effect of reparametrizing the regularization
+# parameter `C` to account for the number of samples when using either L1 or L2
+# penalty. For such purpose we create a synthetic dataset with a large number of
+# features, out of which only a few are informative. We therefore expect the
+# regularization to shrink the coefficients towards zero (L2 penalty) or exactly
+# zero (L1 penalty).
+
 from sklearn.datasets import make_classification
 
 n_samples, n_features = 100, 300
@@ -67,26 +57,50 @@
 )
 
 # %%
-# Now, we can define a linear SVC with the `l1` penalty.
+# L1-penalty case
+# ---------------
+# In the L1 case, theory says that provided a strong regularization, the
+# estimator cannot predict as well as a model knowing the true distribution
+# (even in the limit where the sample size grows to infinity) as it may set some
+# weights of otherwise predictive features to zero, which induces a bias. It does
+# say, however, that it is possible to find the right set of non-zero parameters
+# as well as their signs by tuning `C`.
+#
+# We define a linear SVC with the L1 penalty.
+
 from sklearn.svm import LinearSVC
 
 model_l1 = LinearSVC(penalty="l1", loss="squared_hinge", dual=False, tol=1e-3)
 
 # %%
-# We will compute the mean test score for different values of `C`.
+# We compute the mean test score for different values of `C` via
+# cross-validation.
+
 import numpy as np
 import pandas as pd
-from sklearn.model_selection import validation_curve, ShuffleSplit
+
+from sklearn.model_selection import ShuffleSplit, validation_curve
 
 Cs = np.logspace(-2.3, -1.3, 10)
 train_sizes = np.linspace(0.3, 0.7, 3)
 labels = [f"fraction: {train_size}" for train_size in train_sizes]
+shuffle_params = {
+    "test_size": 0.3,
+    "n_splits": 150,
+    "random_state": 1,
+}
 
 results = {"C": Cs}
 for label, train_size in zip(labels, train_sizes):
-    cv = ShuffleSplit(train_size=train_size, test_size=0.3, n_splits=50, random_state=1)
+    cv = ShuffleSplit(train_size=train_size, **shuffle_params)
     train_scores, test_scores = validation_curve(
-        model_l1, X, y, param_name="C", param_range=Cs, cv=cv
+        model_l1,
+        X,
+        y,
+        param_name="C",
+        param_range=Cs,
+        cv=cv,
+        n_jobs=2,
     )
     results[label] = test_scores.mean(axis=1)
 results = pd.DataFrame(results)
@@ -101,47 +115,59 @@
 axes[0].set_ylabel("CV score")
 axes[0].set_title("No scaling")
 
+for label in labels:
+    best_C = results.loc[results[label].idxmax(), "C"]
+    axes[0].axvline(x=best_C, linestyle="--", color="grey", alpha=0.7)
+
 # plot results by scaling C
 for train_size_idx, label in enumerate(labels):
+    train_size = train_sizes[train_size_idx]
     results_scaled = results[[label]].assign(
-        C_scaled=Cs * float(n_samples * train_sizes[train_size_idx])
+        C_scaled=Cs * float(n_samples * np.sqrt(train_size))
     )
     results_scaled.plot(x="C_scaled", ax=axes[1], logx=True, label=label)
-axes[1].set_title("Scaling C by 1 / n_samples")
+    best_C_scaled = results_scaled["C_scaled"].loc[results[label].idxmax()]
+    axes[1].axvline(x=best_C_scaled, linestyle="--", color="grey", alpha=0.7)
+
+axes[1].set_title("Scaling C by sqrt(1 / n_samples)")
 
 _ = fig.suptitle("Effect of scaling C with L1 penalty")
 
 # %%
-# Here, we observe that the cross-validation-error correlates best with the
-# test-error, when scaling our `C` with the number of samples, `n`.
+# In the region of small `C` (strong regularization) all the coefficients
+# learned by the models are zero, leading to severe underfitting. Indeed, the
+# accuracy in this region is at the chance level.
 #
-# L2-penalty case
-# ---------------
-# We can repeat a similar experiment with the `l2` penalty. In this case, we
-# don't need to use a sparse dataset.
+# Using the default scale results in a somewhat stable optimal value of `C`,
+# whereas the transition out of the underfitting region depends on the number of
+# training samples. The reparametrization leads to even more stable results.
 #
-# In this case, the theory says that in order to achieve prediction
-# consistency, the penalty parameter should be kept constant as the number of
-# samples grow.
+# See e.g. theorem 3 of :arxiv:`On the prediction performance of the Lasso
+# <1402.1700>` or :arxiv:`Simultaneous analysis of Lasso and Dantzig selector
+# <0801.1095>` where the regularization parameter is always assumed to be
+# proportional to 1 / sqrt(n_samples).
 #
-# So we will repeat the same experiment by creating a linear SVC classifier
-# with the `l2` penalty and check the test score via cross-validation and
-# plot the results with and without scaling the parameter `C`.
-rng = np.random.RandomState(1)
-y = np.sign(0.5 - rng.rand(n_samples))
-X = rng.randn(n_samples, n_features // 5) + y[:, np.newaxis]
-X += 5 * rng.randn(n_samples, n_features // 5)
+# L2-penalty case
+# ---------------
+# We can do a similar experiment with the L2 penalty. In this case, the
+# theory says that in order to achieve prediction consistency, the penalty
+# parameter should be kept constant as the number of samples grow.
 
-# %%
 model_l2 = LinearSVC(penalty="l2", loss="squared_hinge", dual=True)
-Cs = np.logspace(-4.5, -2, 10)
+Cs = np.logspace(-8, 4, 11)
 
 labels = [f"fraction: {train_size}" for train_size in train_sizes]
 results = {"C": Cs}
 for label, train_size in zip(labels, train_sizes):
-    cv = ShuffleSplit(train_size=train_size, test_size=0.3, n_splits=50, random_state=1)
+    cv = ShuffleSplit(train_size=train_size, **shuffle_params)
     train_scores, test_scores = validation_curve(
-        model_l2, X, y, param_name="C", param_range=Cs, cv=cv
+        model_l2,
+        X,
+        y,
+        param_name="C",
+        param_range=Cs,
+        cv=cv,
+        n_jobs=2,
     )
     results[label] = test_scores.mean(axis=1)
 results = pd.DataFrame(results)
@@ -156,17 +182,29 @@
 axes[0].set_ylabel("CV score")
 axes[0].set_title("No scaling")
 
+for label in labels:
+    best_C = results.loc[results[label].idxmax(), "C"]
+    axes[0].axvline(x=best_C, linestyle="--", color="grey", alpha=0.8)
+
 # plot results by scaling C
 for train_size_idx, label in enumerate(labels):
     results_scaled = results[[label]].assign(
-        C_scaled=Cs * float(n_samples * train_sizes[train_size_idx])
+        C_scaled=Cs * float(n_samples * np.sqrt(train_sizes[train_size_idx]))
     )
     results_scaled.plot(x="C_scaled", ax=axes[1], logx=True, label=label)
-axes[1].set_title("Scaling C by 1 / n_samples")
+    best_C_scaled = results_scaled["C_scaled"].loc[results[label].idxmax()]
+    axes[1].axvline(x=best_C_scaled, linestyle="--", color="grey", alpha=0.8)
+axes[1].set_title("Scaling C by sqrt(1 / n_samples)")
 
-_ = fig.suptitle("Effect of scaling C with L2 penalty")
+fig.suptitle("Effect of scaling C with L2 penalty")
+plt.show()
 
 # %%
-# So or the L2 penalty case, the best result comes from the case where `C` is
-# not scaled.
-plt.show()
+# For the L2 penalty case, the reparametrization seems to have a smaller impact
+# on the stability of the optimal value for the regularization. The transition
+# out of the overfitting region occurs in a more spread range and the accuracy
+# does not seem to be degraded up to chance level.
+#
+# Try increasing the value to `n_splits=1_000` for better results in the L2
+# case, which is not shown here due to the limitations on the documentation
+# builder.
diff --git a/examples/svm/plot_svm_tie_breaking.py b/examples/svm/plot_svm_tie_breaking.py
index 93148225b0bb3..848b81dee9c69 100644
--- a/examples/svm/plot_svm_tie_breaking.py
+++ b/examples/svm/plot_svm_tie_breaking.py
@@ -17,10 +17,11 @@
 # Code source: Andreas Mueller, Adrin Jalali
 # License: BSD 3 clause
 
-import numpy as np
 import matplotlib.pyplot as plt
-from sklearn.svm import SVC
+import numpy as np
+
 from sklearn.datasets import make_blobs
+from sklearn.svm import SVC
 
 X, y = make_blobs(random_state=27)
 
diff --git a/examples/svm/plot_weighted_samples.py b/examples/svm/plot_weighted_samples.py
index f346599300aba..c17742e091390 100644
--- a/examples/svm/plot_weighted_samples.py
+++ b/examples/svm/plot_weighted_samples.py
@@ -14,8 +14,9 @@
 
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn import svm
 
 
diff --git a/examples/text/plot_document_classification_20newsgroups.py b/examples/text/plot_document_classification_20newsgroups.py
index ffee60087d0c3..04aad46c8451a 100644
--- a/examples/text/plot_document_classification_20newsgroups.py
+++ b/examples/text/plot_document_classification_20newsgroups.py
@@ -36,9 +36,10 @@
 # the classification problem "too easy". This is achieved using simple
 # heuristics that are neither perfect nor standard, hence disabled by default.
 
+from time import time
+
 from sklearn.datasets import fetch_20newsgroups
 from sklearn.feature_extraction.text import TfidfVectorizer
-from time import time
 
 categories = [
     "alt.atheism",
@@ -158,6 +159,7 @@ def load_dataset(verbose=False, remove=()):
 # in the classification errors.
 
 import matplotlib.pyplot as plt
+
 from sklearn.metrics import ConfusionMatrixDisplay
 
 fig, ax = plt.subplots(figsize=(10, 5))
@@ -182,8 +184,8 @@ def load_dataset(verbose=False, remove=()):
 # We can gain a deeper understanding of how this classifier makes its decisions
 # by looking at the words with the highest average feature effects:
 
-import pandas as pd
 import numpy as np
+import pandas as pd
 
 
 def plot_feature_effects():
@@ -315,8 +317,8 @@ def plot_feature_effects():
 # training time and testing time. For such purpose we define the following
 # benchmarking utilities:
 
-from sklearn.utils.extmath import density
 from sklearn import metrics
+from sklearn.utils.extmath import density
 
 
 def benchmark(clf, custom_name=False):
@@ -358,17 +360,14 @@ def benchmark(clf, custom_name=False):
 # Notice that the most important hyperparameters values were tuned using a grid
 # search procedure not shown in this notebook for the sake of simplicity. See
 # the example script
-# :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`
+# :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`  # noqa: E501
 # for a demo on how such tuning can be done.
 
-from sklearn.linear_model import LogisticRegression
-from sklearn.svm import LinearSVC
-from sklearn.linear_model import SGDClassifier
-from sklearn.naive_bayes import ComplementNB
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.neighbors import NearestCentroid
 from sklearn.ensemble import RandomForestClassifier
-
+from sklearn.linear_model import LogisticRegression, SGDClassifier
+from sklearn.naive_bayes import ComplementNB
+from sklearn.neighbors import KNeighborsClassifier, NearestCentroid
+from sklearn.svm import LinearSVC
 
 results = []
 for clf, name in (
diff --git a/examples/text/plot_document_clustering.py b/examples/text/plot_document_clustering.py
index 368cf7cea60ae..fa68b8bd312ea 100644
--- a/examples/text/plot_document_clustering.py
+++ b/examples/text/plot_document_clustering.py
@@ -46,6 +46,7 @@
 # strip those features and have a more sensible clustering problem.
 
 import numpy as np
+
 from sklearn.datasets import fetch_20newsgroups
 
 categories = [
@@ -104,9 +105,10 @@
 # For more reference, see :ref:`clustering_evaluation`.
 
 from collections import defaultdict
-from sklearn import metrics
 from time import time
 
+from sklearn import metrics
+
 evaluations = []
 evaluations_std = []
 
@@ -277,7 +279,6 @@ def fit_and_evaluate(km, X, name=None, n_runs=5):
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import Normalizer
 
-
 lsa = make_pipeline(TruncatedSVD(n_components=100), Normalizer(copy=False))
 t0 = time()
 X_lsa = lsa.fit_transform(X_tfidf)
@@ -353,8 +354,7 @@ def fit_and_evaluate(km, X, name=None, n_runs=5):
 # case we also add LSA to the pipeline to reduce the dimension and sparcity of
 # the hashed vector space.
 
-from sklearn.feature_extraction.text import HashingVectorizer
-from sklearn.feature_extraction.text import TfidfTransformer
+from sklearn.feature_extraction.text import HashingVectorizer, TfidfTransformer
 
 lsa_vectorizer = make_pipeline(
     HashingVectorizer(stop_words="english", n_features=50_000),
@@ -394,8 +394,8 @@ def fit_and_evaluate(km, X, name=None, n_runs=5):
 # Clustering evaluation summary
 # ==============================
 
-import pandas as pd
 import matplotlib.pyplot as plt
+import pandas as pd
 
 fig, (ax0, ax1) = plt.subplots(ncols=2, figsize=(16, 6), sharey=True)
 
diff --git a/examples/text/plot_hashing_vs_dict_vectorizer.py b/examples/text/plot_hashing_vs_dict_vectorizer.py
index 8200c646f69ee..ce2dcc2d13c41 100644
--- a/examples/text/plot_hashing_vs_dict_vectorizer.py
+++ b/examples/text/plot_hashing_vs_dict_vectorizer.py
@@ -118,6 +118,7 @@ def token_freqs(doc):
 # both of them receive dictionaries as input.
 
 from time import time
+
 from sklearn.feature_extraction import DictVectorizer
 
 dict_count_vectorizers = defaultdict(list)
diff --git a/examples/tree/plot_cost_complexity_pruning.py b/examples/tree/plot_cost_complexity_pruning.py
index d21d163c9a1e3..b232389ea9ded 100644
--- a/examples/tree/plot_cost_complexity_pruning.py
+++ b/examples/tree/plot_cost_complexity_pruning.py
@@ -18,8 +18,9 @@
 """
 
 import matplotlib.pyplot as plt
-from sklearn.model_selection import train_test_split
+
 from sklearn.datasets import load_breast_cancer
+from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeClassifier
 
 # %%
diff --git a/examples/tree/plot_iris_dtc.py b/examples/tree/plot_iris_dtc.py
index 14f6506b5810f..b3d834da5d067 100644
--- a/examples/tree/plot_iris_dtc.py
+++ b/examples/tree/plot_iris_dtc.py
@@ -23,13 +23,12 @@
 
 # %%
 # Display the decision functions of trees trained on all pairs of features.
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
 
 from sklearn.datasets import load_iris
-from sklearn.tree import DecisionTreeClassifier
 from sklearn.inspection import DecisionBoundaryDisplay
-
+from sklearn.tree import DecisionTreeClassifier
 
 # Parameters
 n_classes = 3
diff --git a/examples/tree/plot_tree_regression.py b/examples/tree/plot_tree_regression.py
index 6ed28a5cbfa99..5a3da0b7b6d06 100644
--- a/examples/tree/plot_tree_regression.py
+++ b/examples/tree/plot_tree_regression.py
@@ -15,9 +15,10 @@
 """
 
 # Import the necessary modules and libraries
+import matplotlib.pyplot as plt
 import numpy as np
+
 from sklearn.tree import DecisionTreeRegressor
-import matplotlib.pyplot as plt
 
 # Create a random dataset
 rng = np.random.RandomState(1)
diff --git a/examples/tree/plot_tree_regression_multioutput.py b/examples/tree/plot_tree_regression_multioutput.py
index a75652a6ddd56..b6d2800d2732d 100644
--- a/examples/tree/plot_tree_regression_multioutput.py
+++ b/examples/tree/plot_tree_regression_multioutput.py
@@ -15,8 +15,9 @@
 details of the training data and learn from the noise, i.e. they overfit.
 """
 
-import numpy as np
 import matplotlib.pyplot as plt
+import numpy as np
+
 from sklearn.tree import DecisionTreeRegressor
 
 # Create a random dataset
diff --git a/examples/tree/plot_unveil_tree_structure.py b/examples/tree/plot_unveil_tree_structure.py
index 6313d0ccbb74f..9ce67132ef12a 100644
--- a/examples/tree/plot_unveil_tree_structure.py
+++ b/examples/tree/plot_unveil_tree_structure.py
@@ -19,10 +19,10 @@
 import numpy as np
 from matplotlib import pyplot as plt
 
-from sklearn.model_selection import train_test_split
+from sklearn import tree
 from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
 from sklearn.tree import DecisionTreeClassifier
-from sklearn import tree
 
 ##############################################################################
 # Train tree classifier
@@ -44,13 +44,15 @@
 #
 # The decision classifier has an attribute called ``tree_`` which allows access
 # to low level attributes such as ``node_count``, the total number of nodes,
-# and ``max_depth``, the maximal depth of the tree. It also stores the
-# entire binary tree structure, represented as a number of parallel arrays. The
-# i-th element of each array holds information about the node ``i``. Node 0 is
-# the tree's root. Some of the arrays only apply to either leaves or split
-# nodes. In this case the values of the nodes of the other type is arbitrary.
-# For example, the arrays ``feature`` and ``threshold`` only apply to split
-# nodes. The values for leaf nodes in these arrays are therefore arbitrary.
+# and ``max_depth``, the maximal depth of the tree. The
+# ``tree_.compute_node_depths()`` method computes the depth of each node in the
+# tree. `tree_` also stores the entire binary tree structure, represented as a
+# number of parallel arrays. The i-th element of each array holds information
+# about the node ``i``. Node 0 is the tree's root. Some of the arrays only
+# apply to either leaves or split nodes. In this case the values of the nodes
+# of the other type is arbitrary. For example, the arrays ``feature`` and
+# ``threshold`` only apply to split nodes. The values for leaf nodes in these
+# arrays are therefore arbitrary.
 #
 # Among these arrays, we have:
 #
@@ -63,6 +65,10 @@
 #   - ``n_node_samples[i]``: the number of training samples reaching node
 #     ``i``
 #   - ``impurity[i]``: the impurity at node ``i``
+#   - ``weighted_n_node_samples[i]``: the weighted number of training samples
+#     reaching node ``i``
+#   - ``value[i, j, k]``: the summary of the training samples that reached node i for
+#     class j and output k.
 #
 # Using the arrays, we can traverse the tree structure to compute various
 # properties. Below, we will compute the depth of each node and whether or not
@@ -73,6 +79,7 @@
 children_right = clf.tree_.children_right
 feature = clf.tree_.feature
 threshold = clf.tree_.threshold
+values = clf.tree_.value
 
 node_depth = np.zeros(shape=n_nodes, dtype=np.int64)
 is_leaves = np.zeros(shape=n_nodes, dtype=bool)
@@ -100,13 +107,13 @@
 for i in range(n_nodes):
     if is_leaves[i]:
         print(
-            "{space}node={node} is a leaf node.".format(
-                space=node_depth[i] * "\t", node=i
+            "{space}node={node} is a leaf node with value={value}.".format(
+                space=node_depth[i] * "\t", node=i, value=values[i]
             )
         )
     else:
         print(
-            "{space}node={node} is a split node: "
+            "{space}node={node} is a split node with value={value}: "
             "go to node {left} if X[:, {feature}] <= {threshold} "
             "else to node {right}.".format(
                 space=node_depth[i] * "\t",
@@ -115,9 +122,30 @@
                 feature=feature[i],
                 threshold=threshold[i],
                 right=children_right[i],
+                value=values[i],
             )
         )
 
+# %%
+# What is the values array used here?
+# -----------------------------------
+# The `tree_.value` array is a 3D array of shape
+# [``n_nodes``, ``n_classes``, ``n_outputs``] which provides the count of samples
+# reaching a node for each class and for each output. Each node has a ``value``
+# array which is the number of weighted samples reaching this
+# node for each output and class.
+#
+# For example, in the above tree built on the iris dataset, the root node has
+# ``value = [37, 34, 41]``, indicating there are 37 samples
+# of class 0, 34 samples of class 1, and 41 samples of class 2 at the root node.
+# Traversing the tree, the samples are split and as a result, the ``value`` array
+# reaching each node changes. The left child of the root node has ``value = [37, 0, 0]``
+# because all 37 samples in the left child node are from class 0.
+#
+# Note: In this example, `n_outputs=1`, but the tree classifier can also handle
+# multi-output problems. The `value` array at each node would just be a 2D
+# array instead.
+
 ##############################################################################
 # We can compare the above output to the plot of the decision tree.
 
diff --git a/maint_tools/check_pxd_in_installation.py b/maint_tools/check_pxd_in_installation.py
index ac1a8f9627a95..996d45d64d42a 100644
--- a/maint_tools/check_pxd_in_installation.py
+++ b/maint_tools/check_pxd_in_installation.py
@@ -6,12 +6,11 @@
 """
 
 import os
-import sys
 import pathlib
+import subprocess
+import sys
 import tempfile
 import textwrap
-import subprocess
-
 
 sklearn_dir = pathlib.Path(sys.argv[1])
 pxd_files = list(sklearn_dir.glob("**/*.pxd"))
diff --git a/maint_tools/sort_whats_new.py b/maint_tools/sort_whats_new.py
index 178e33bc87e5f..7241059176b66 100755
--- a/maint_tools/sort_whats_new.py
+++ b/maint_tools/sort_whats_new.py
@@ -2,8 +2,8 @@
 # Sorts what's new entries with per-module headings.
 # Pass what's new entries on stdin.
 
-import sys
 import re
+import sys
 from collections import defaultdict
 
 LABEL_ORDER = ["MajorFeature", "Feature", "Efficiency", "Enhancement", "Fix", "API"]
diff --git a/maint_tools/update_tracking_issue.py b/maint_tools/update_tracking_issue.py
index 4ddc9d1bfe8e6..725802416fb6c 100644
--- a/maint_tools/update_tracking_issue.py
+++ b/maint_tools/update_tracking_issue.py
@@ -11,10 +11,10 @@
 github account that does **not** have commit access to the public repo.
 """
 
-from pathlib import Path
-import sys
 import argparse
+import sys
 from datetime import datetime, timezone
+from pathlib import Path
 
 import defusedxml.ElementTree as ET
 from github import Github
diff --git a/pyproject.toml b/pyproject.toml
index 7e39589216956..e662c6cd42e2f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@
 requires = [
     "setuptools",
     "wheel",
-    "Cython>=0.29.33",
+    "Cython>=0.29.33,<3.0",
 
     # use oldest-supported-numpy which provides the oldest numpy version with
     # wheels on PyPI
@@ -39,12 +39,51 @@ exclude = '''
 )/
 '''
 
+[tool.ruff]
+# all rules can be found here: https://beta.ruff.rs/docs/rules/
+select = ["E", "F", "W", "I"]
+
+# max line length for black
+line-length = 88
+target-version = "py38"
+
+ignore=[
+    # space before : (needed for how black formats slicing)
+    "E203",
+    # do not assign a lambda expression, use a def
+    "E731",
+    # do not use variables named 'l', 'O', or 'I'
+    "E741",
+]
+
+exclude=[
+    ".git",
+    "__pycache__",
+    "dist",
+    "sklearn/externals",
+    "doc/_build",
+    "doc/auto_examples",
+    "doc/tutorial",
+    "build",
+    "asv_benchmarks/env",
+    "asv_benchmarks/html",
+    "asv_benchmarks/results",
+    "asv_benchmarks/benchmarks/cache",
+]
+
+[tool.ruff.per-file-ignores]
+# It's fine not to put the import at the top of the file in the examples
+# folder.
+"examples/*"=["E402"]
+"doc/conf.py"=["E402"]
+
+
 [tool.cython-lint]
-# Ignore the same error codes as flake8
+# Ignore the same error codes as ruff
 # + E501 (line too long) because keeping it < 88 in cython
 # often makes code less readable.
 ignore = [
-    # check ignored by default in flake8. Meaning unclear.
+    # multiple spaces/tab after comma
     'E24',
     # space before : (needed for how black formats slicing)
     'E203',
diff --git a/setup.cfg b/setup.cfg
index 19f2bebeb7280..94ed59f539cb7 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -20,52 +20,15 @@ addopts =
     # correctly on the CI when running `pytest --pyargs sklearn` from the
     # source folder.
     -p sklearn.tests.random_seed
-    -rN
 
 filterwarnings =
     ignore:the matrix subclass:PendingDeprecationWarning
 
-[flake8]
-# max line length for black
-max-line-length = 88
-target-version = ['py37']
-# Default flake8 3.5 ignored flags
-ignore=
-    # check ignored by default in flake8. Meaning unclear.
-    E24,
-    # space before : (needed for how black formats slicing)
-    E203,
-    # do not assign a lambda expression, use a def
-    E731,
-    # do not use variables named 'l', 'O', or 'I'
-    E741,
-    # line break before binary operator
-    W503,
-    # line break after binary operator
-    W504
-exclude=
-    .git,
-    __pycache__,
-    dist,
-    sklearn/externals,
-    doc/_build,
-    doc/auto_examples,
-    doc/tutorial,
-    build,
-    asv_benchmarks/env,
-    asv_benchmarks/html,
-    asv_benchmarks/results,
-    asv_benchmarks/benchmarks/cache
-
-# It's fine not to put the import at the top of the file in the examples
-# folder.
-per-file-ignores =
-    examples/*: E402
-    doc/conf.py: E402
-
 [mypy]
 ignore_missing_imports = True
 allow_redefinition = True
+exclude=
+    sklearn/externals
 
 [check-manifest]
 # ignore files missing in VCS
diff --git a/setup.py b/setup.py
index 33d105a213a7c..d3f70c548316c 100755
--- a/setup.py
+++ b/setup.py
@@ -4,18 +4,17 @@
 #               2010 Fabian Pedregosa <fabian.pedregosa@inria.fr>
 # License: 3-clause BSD
 
-import sys
+import importlib
 import os
-from os.path import join
 import platform
 import shutil
+import sys
+import traceback
+from os.path import join
 
 from setuptools import Command, Extension, setup
 from setuptools.command.build_ext import build_ext
 
-import traceback
-import importlib
-
 try:
     import builtins
 except ImportError:
@@ -454,10 +453,10 @@ def configure_extension_modules():
     if "sdist" in sys.argv or "--help" in sys.argv:
         return []
 
-    from sklearn._build_utils import cythonize_extensions
-    from sklearn._build_utils import gen_from_templates
     import numpy
 
+    from sklearn._build_utils import cythonize_extensions, gen_from_templates
+
     is_pypy = platform.python_implementation() == "PyPy"
     np_include = numpy.get_include()
     default_optimization_level = "O2"
@@ -588,6 +587,7 @@ def setup_package():
             "Programming Language :: Python :: 3.9",
             "Programming Language :: Python :: 3.10",
             "Programming Language :: Python :: 3.11",
+            "Programming Language :: Python :: 3.12",
             "Programming Language :: Python :: Implementation :: CPython",
             "Programming Language :: Python :: Implementation :: PyPy",
         ],
@@ -602,6 +602,12 @@ def setup_package():
         },
     )
 
+    # Overwrite the dependencies to not allow for NumPy >= 2.0
+    metadata["install_requires"] = [
+        f"{dep},<2.0" if dep.startswith("numpy") else dep
+        for dep in metadata["install_requires"]
+    ]
+
     commands = [arg for arg in sys.argv[1:] if not arg.startswith("-")]
     if not all(
         command in ("egg_info", "dist_info", "clean", "check") for command in commands
diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 47bb893bd00a0..48d907fa5ad23 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -12,13 +12,12 @@
 
 See http://scikit-learn.org for complete documentation.
 """
-import sys
 import logging
 import os
 import random
+import sys
 
-
-from ._config import get_config, set_config, config_context
+from ._config import config_context, get_config, set_config
 
 logger = logging.getLogger(__name__)
 
@@ -39,7 +38,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-__version__ = "1.3.dev0"
+__version__ = "1.3.2"
 
 
 # On OSX, we can get a runtime error due to multiple OpenMP libraries loaded
@@ -77,8 +76,10 @@
     # It is necessary to do this prior to importing show_versions as the
     # later is linked to the OpenMP runtime to make it possible to introspect
     # it and importing it first would fail if the OpenMP dll cannot be found.
-    from . import _distributor_init  # noqa: F401
-    from . import __check_build  # noqa: F401
+    from . import (
+        __check_build,  # noqa: F401
+        _distributor_init,  # noqa: F401
+    )
     from .base import clone
     from .utils._show_versions import show_versions
 
diff --git a/sklearn/_build_utils/__init__.py b/sklearn/_build_utils/__init__.py
index f84dfa09a9f94..056215e162647 100644
--- a/sklearn/_build_utils/__init__.py
+++ b/sklearn/_build_utils/__init__.py
@@ -5,15 +5,15 @@
 # license: BSD
 
 
+import contextlib
 import os
+
 import sklearn
-import contextlib
 
-from .pre_build_helpers import basic_check_build
-from .openmp_helpers import check_openmp_support
 from .._min_dependencies import CYTHON_MIN_VERSION
 from ..externals._packaging.version import parse
-
+from .openmp_helpers import check_openmp_support
+from .pre_build_helpers import basic_check_build
 
 DEFAULT_ROOT = "sklearn"
 
diff --git a/sklearn/_build_utils/pre_build_helpers.py b/sklearn/_build_utils/pre_build_helpers.py
index c1d50abd3ae0c..f3eb054bb037e 100644
--- a/sklearn/_build_utils/pre_build_helpers.py
+++ b/sklearn/_build_utils/pre_build_helpers.py
@@ -1,11 +1,11 @@
 """Helpers to check build environment before actual build of scikit-learn"""
 
+import glob
 import os
+import subprocess
 import sys
-import glob
 import tempfile
 import textwrap
-import subprocess
 
 from setuptools.command.build_ext import customize_compiler, new_compiler
 
diff --git a/sklearn/_config.py b/sklearn/_config.py
index 43755071e54e9..91d149c81dc59 100644
--- a/sklearn/_config.py
+++ b/sklearn/_config.py
@@ -1,8 +1,8 @@
 """Global configuration state and functions for management
 """
 import os
-from contextlib import contextmanager as contextmanager
 import threading
+from contextlib import contextmanager as contextmanager
 
 _global_config = {
     "assume_finite": bool(os.environ.get("SKLEARN_ASSUME_FINITE", False)),
@@ -59,7 +59,7 @@ def set_config(
     enable_metadata_routing=None,
     skip_parameter_validation=None,
 ):
-    """Set global scikit-learn configuration
+    """Set global scikit-learn configuration.
 
     .. versionadded:: 0.19
 
diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py
index 78b1eb8543c8d..ee15e693c16f6 100644
--- a/sklearn/_loss/__init__.py
+++ b/sklearn/_loss/__init__.py
@@ -4,19 +4,18 @@
 """
 
 from .loss import (
-    HalfSquaredError,
     AbsoluteError,
-    PinballLoss,
-    HuberLoss,
-    HalfPoissonLoss,
+    HalfBinomialLoss,
     HalfGammaLoss,
+    HalfMultinomialLoss,
+    HalfPoissonLoss,
+    HalfSquaredError,
     HalfTweedieLoss,
     HalfTweedieLossIdentity,
-    HalfBinomialLoss,
-    HalfMultinomialLoss,
+    HuberLoss,
+    PinballLoss,
 )
 
-
 __all__ = [
     "HalfSquaredError",
     "AbsoluteError",
diff --git a/sklearn/_loss/link.py b/sklearn/_loss/link.py
index 510ef80c641fc..9459844f6b89a 100644
--- a/sklearn/_loss/link.py
+++ b/sklearn/_loss/link.py
@@ -9,6 +9,7 @@
 import numpy as np
 from scipy.special import expit, logit
 from scipy.stats import gmean
+
 from ..utils.extmath import softmax
 
 
diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py
index 037d933aa5491..f3b61da0915d5 100644
--- a/sklearn/_loss/loss.py
+++ b/sklearn/_loss/loss.py
@@ -16,31 +16,33 @@
 # - Replace link module of GLMs.
 
 import numbers
+
 import numpy as np
 from scipy.special import xlogy
+
+from ..utils import check_scalar
+from ..utils.stats import _weighted_percentile
 from ._loss import (
-    CyHalfSquaredError,
     CyAbsoluteError,
-    CyPinballLoss,
-    CyHuberLoss,
-    CyHalfPoissonLoss,
+    CyExponentialLoss,
+    CyHalfBinomialLoss,
     CyHalfGammaLoss,
+    CyHalfMultinomialLoss,
+    CyHalfPoissonLoss,
+    CyHalfSquaredError,
     CyHalfTweedieLoss,
     CyHalfTweedieLossIdentity,
-    CyHalfBinomialLoss,
-    CyHalfMultinomialLoss,
-    CyExponentialLoss,
+    CyHuberLoss,
+    CyPinballLoss,
 )
 from .link import (
-    Interval,
+    HalfLogitLink,
     IdentityLink,
-    LogLink,
+    Interval,
     LogitLink,
-    HalfLogitLink,
+    LogLink,
     MultinomialLogit,
 )
-from ..utils import check_scalar
-from ..utils.stats import _weighted_percentile
 
 
 # Note: The shape of raw_prediction for multiclass classifications are
diff --git a/sklearn/_loss/tests/test_link.py b/sklearn/_loss/tests/test_link.py
index 8421fd3fd7a77..e5a665f8d48ac 100644
--- a/sklearn/_loss/tests/test_link.py
+++ b/sklearn/_loss/tests/test_link.py
@@ -1,16 +1,15 @@
 import numpy as np
-from numpy.testing import assert_allclose, assert_array_equal
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 
 from sklearn._loss.link import (
     _LINKS,
-    _inclusive_low_high,
     HalfLogitLink,
-    MultinomialLogit,
     Interval,
+    MultinomialLogit,
+    _inclusive_low_high,
 )
 
-
 LINK_FUNCTIONS = list(_LINKS.values())
 
 
diff --git a/sklearn/_loss/tests/test_loss.py b/sklearn/_loss/tests/test_loss.py
index dbfe5b3829dda..d279a2f06a182 100644
--- a/sklearn/_loss/tests/test_loss.py
+++ b/sklearn/_loss/tests/test_loss.py
@@ -1,22 +1,22 @@
 import pickle
 
 import numpy as np
-from numpy.testing import assert_allclose, assert_array_equal
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 from pytest import approx
 from scipy.optimize import (
+    LinearConstraint,
     minimize,
     minimize_scalar,
     newton,
-    LinearConstraint,
 )
 from scipy.special import logsumexp
 
-from sklearn._loss.link import _inclusive_low_high, IdentityLink
+from sklearn._loss.link import IdentityLink, _inclusive_low_high
 from sklearn._loss.loss import (
     _LOSSES,
-    BaseLoss,
     AbsoluteError,
+    BaseLoss,
     HalfBinomialLoss,
     HalfGammaLoss,
     HalfMultinomialLoss,
@@ -30,7 +30,6 @@
 from sklearn.utils import assert_all_finite
 from sklearn.utils._testing import create_memmap_backed_data, skip_if_32bit
 
-
 ALL_LOSSES = list(_LOSSES.values())
 
 LOSS_INSTANCES = [loss() for loss in ALL_LOSSES]
diff --git a/sklearn/_min_dependencies.py b/sklearn/_min_dependencies.py
index b6dd0656987b5..a4829c42b505b 100644
--- a/sklearn/_min_dependencies.py
+++ b/sklearn/_min_dependencies.py
@@ -1,8 +1,7 @@
 """All minimum dependencies for scikit-learn."""
-from collections import defaultdict
-import platform
 import argparse
-
+import platform
+from collections import defaultdict
 
 # scipy and cython should by in sync with pyproject.toml
 
@@ -37,12 +36,13 @@
     "memory_profiler": ("0.57.0", "benchmark, docs"),
     "pytest": (PYTEST_MIN_VERSION, "tests"),
     "pytest-cov": ("2.9.0", "tests"),
-    "flake8": ("3.8.2", "tests"),
+    "ruff": ("0.0.272", "tests"),
     "black": ("23.3.0", "tests"),
-    "mypy": ("0.961", "tests"),
+    "mypy": ("1.3", "tests"),
     "pyamg": ("4.0.0", "tests"),
-    "sphinx": ("4.0.1", "docs"),
-    "sphinx-gallery": ("0.7.0", "docs"),
+    "sphinx": ("6.0.0", "docs"),
+    "sphinx-copybutton": ("0.5.2", "docs"),
+    "sphinx-gallery": ("0.10.1", "docs"),
     "numpydoc": ("1.2.0", "docs, tests"),
     "Pillow": ("7.1.2", "docs"),
     "pooch": ("1.6.0", "docs, examples, tests"),
@@ -51,7 +51,7 @@
     "plotly": ("5.14.0", "docs, examples"),
     # XXX: Pin conda-lock to the latest released version (needs manual update
     # from time to time)
-    "conda-lock": ("2.0.0", "maintenance"),
+    "conda-lock": ("2.1.1", "maintenance"),
 }
 
 
diff --git a/sklearn/base.py b/sklearn/base.py
index 13bbcab96aa61..893925c8104a0 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -5,33 +5,36 @@
 
 import copy
 import functools
-import warnings
-from collections import defaultdict
-import platform
 import inspect
+import platform
 import re
+import warnings
+from collections import defaultdict
 
 import numpy as np
 
 from . import __version__
-from ._config import get_config, config_context
+from ._config import config_context, get_config
+from .exceptions import InconsistentVersionWarning
 from .utils import _IS_32BIT
+from .utils._estimator_html_repr import estimator_html_repr
+from .utils._metadata_requests import _MetadataRequester
+from .utils._param_validation import validate_parameter_constraints
 from .utils._set_output import _SetOutputMixin
 from .utils._tags import (
     _DEFAULT_TAGS,
 )
-from .exceptions import InconsistentVersionWarning
-from .utils.validation import check_X_y
-from .utils.validation import check_array
-from .utils.validation import _check_y
-from .utils.validation import _num_features
-from .utils.validation import _check_feature_names_in
-from .utils.validation import _generate_get_feature_names_out
-from .utils.validation import _is_fitted, check_is_fitted
-from .utils._metadata_requests import _MetadataRequester
-from .utils.validation import _get_feature_names
-from .utils._estimator_html_repr import estimator_html_repr
-from .utils._param_validation import validate_parameter_constraints
+from .utils.validation import (
+    _check_feature_names_in,
+    _check_y,
+    _generate_get_feature_names_out,
+    _get_feature_names,
+    _is_fitted,
+    _num_features,
+    check_array,
+    check_is_fitted,
+    check_X_y,
+)
 
 
 def clone(estimator, *, safe=True):
@@ -77,8 +80,9 @@ def _clone_parametrized(estimator, *, safe=True):
     """Default implementation of clone. See :func:`sklearn.base.clone` for details."""
 
     estimator_type = type(estimator)
-    # XXX: not handling dictionaries
-    if estimator_type in (list, tuple, set, frozenset):
+    if estimator_type is dict:
+        return {k: clone(v, safe=safe) for k, v in estimator.items()}
+    elif estimator_type in (list, tuple, set, frozenset):
         return estimator_type([clone(e, safe=safe) for e in estimator])
     elif not hasattr(estimator, "get_params") or isinstance(estimator, type):
         if not safe:
@@ -872,12 +876,12 @@ def get_submatrix(self, i, data):
 class TransformerMixin(_SetOutputMixin):
     """Mixin class for all transformers in scikit-learn.
 
-    If :term:`get_feature_names_out` is defined, then `BaseEstimator` will
+    If :term:`get_feature_names_out` is defined, then :class:`BaseEstimator` will
     automatically wrap `transform` and `fit_transform` to follow the `set_output`
     API. See the :ref:`developer_api_set_output` for details.
 
-    :class:`base.OneToOneFeatureMixin` and
-    :class:`base.ClassNamePrefixFeaturesOutMixin` are helpful mixins for
+    :class:`OneToOneFeatureMixin` and
+    :class:`ClassNamePrefixFeaturesOutMixin` are helpful mixins for
     defining :term:`get_feature_names_out`.
     """
 
@@ -919,7 +923,7 @@ class OneToOneFeatureMixin:
     """Provides `get_feature_names_out` for simple transformers.
 
     This mixin assumes there's a 1-to-1 correspondence between input features
-    and output features, such as :class:`~preprocessing.StandardScaler`.
+    and output features, such as :class:`~sklearn.preprocessing.StandardScaler`.
     """
 
     def get_feature_names_out(self, input_features=None):
@@ -950,8 +954,8 @@ class ClassNamePrefixFeaturesOutMixin:
     """Mixin class for transformers that generate their own names by prefixing.
 
     This mixin is useful when the transformer needs to generate its own feature
-    names out, such as :class:`~decomposition.PCA`. For example, if
-    :class:`~decomposition.PCA` outputs 3 features, then the generated feature
+    names out, such as :class:`~sklearn.decomposition.PCA`. For example, if
+    :class:`~sklearn.decomposition.PCA` outputs 3 features, then the generated feature
     names out are: `["pca0", "pca1", "pca2"]`.
 
     This mixin assumes that a `_n_features_out` attribute is defined when the
@@ -969,7 +973,7 @@ def get_feature_names_out(self, input_features=None):
         Parameters
         ----------
         input_features : array-like of str or None, default=None
-            Only used to validate feature names with the names seen in :meth:`fit`.
+            Only used to validate feature names with the names seen in `fit`.
 
         Returns
         -------
diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index e4869387f4166..1fc2eeb4f64fa 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -7,43 +7,51 @@
 #
 # License: BSD 3 clause
 
-from numbers import Integral, Real
 import warnings
-from inspect import signature
 from functools import partial
-
+from inspect import signature
 from math import log
-import numpy as np
+from numbers import Integral, Real
 
-from scipy.special import expit
-from scipy.special import xlogy
+import numpy as np
 from scipy.optimize import fmin_bfgs
+from scipy.special import expit, xlogy
+
+from sklearn.utils import Bunch
 
 from .base import (
     BaseEstimator,
     ClassifierMixin,
-    RegressorMixin,
-    clone,
     MetaEstimatorMixin,
+    RegressorMixin,
     _fit_context,
+    clone,
 )
-from .preprocessing import label_binarize, LabelEncoder
+from .isotonic import IsotonicRegression
+from .model_selection import check_cv, cross_val_predict
+from .preprocessing import LabelEncoder, label_binarize
+from .svm import LinearSVC
 from .utils import (
+    _safe_indexing,
     column_or_1d,
     indexable,
-    _safe_indexing,
 )
-
-from .utils.multiclass import check_classification_targets
-from .utils.parallel import delayed, Parallel
 from .utils._param_validation import (
-    StrOptions,
     HasMethods,
     Hidden,
-    validate_params,
     Interval,
+    StrOptions,
+    validate_params,
 )
 from .utils._plotting import _BinaryClassifierCurveDisplayMixin
+from .utils.metadata_routing import (
+    MetadataRouter,
+    MethodMapping,
+    _routing_enabled,
+    process_routing,
+)
+from .utils.multiclass import check_classification_targets
+from .utils.parallel import Parallel, delayed
 from .utils.validation import (
     _check_fit_params,
     _check_pos_label_consistency,
@@ -52,16 +60,6 @@
     check_consistent_length,
     check_is_fitted,
 )
-from .isotonic import IsotonicRegression
-from .svm import LinearSVC
-from .model_selection import check_cv, cross_val_predict
-from sklearn.utils import Bunch
-from .utils.metadata_routing import (
-    MetadataRouter,
-    MethodMapping,
-    process_routing,
-    _routing_enabled,
-)
 
 
 class CalibratedClassifierCV(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
@@ -534,7 +532,7 @@ def get_metadata_routing(self):
         Returns
         -------
         routing : MetadataRouter
-            A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
             routing information.
         """
         router = (
@@ -825,7 +823,11 @@ def predict_proba(self, X):
         return proba
 
 
-def _sigmoid_calibration(predictions, y, sample_weight=None):
+# The max_abs_prediction_threshold was approximated using
+# logit(np.finfo(np.float64).eps) which is about -36
+def _sigmoid_calibration(
+    predictions, y, sample_weight=None, max_abs_prediction_threshold=30
+):
     """Probability Calibration with sigmoid method (Platt 2000)
 
     Parameters
@@ -856,6 +858,20 @@ def _sigmoid_calibration(predictions, y, sample_weight=None):
 
     F = predictions  # F follows Platt's notations
 
+    scale_constant = 1.0
+    max_prediction = np.max(np.abs(F))
+
+    # If the predictions have large values we scale them in order to bring
+    # them within a suitable range. This has no effect on the final
+    # (prediction) result because linear models like Logisitic Regression
+    # without a penalty are invariant to multiplying the features by a
+    # constant.
+    if max_prediction >= max_abs_prediction_threshold:
+        scale_constant = max_prediction
+        # We rescale the features in a copy: inplace rescaling could confuse
+        # the caller and make the code harder to reason about.
+        F = F / scale_constant
+
     # Bayesian priors (see Platt end of section 2.2):
     # It corresponds to the number of samples, taking into account the
     # `sample_weight`.
@@ -892,7 +908,11 @@ def grad(AB):
 
     AB0 = np.array([0.0, log((prior0 + 1.0) / (prior1 + 1.0))])
     AB_ = fmin_bfgs(objective, AB0, fprime=grad, disp=False)
-    return AB_[0], AB_[1]
+
+    # The tuned multiplicative parameter is converted back to the original
+    # input feature scale. The offset parameter does not need rescaling since
+    # we did not rescale the outcome variable.
+    return AB_[0] / scale_constant, AB_[1]
 
 
 class _SigmoidCalibration(RegressorMixin, BaseEstimator):
@@ -957,7 +977,8 @@ def predict(self, T):
         "pos_label": [Real, str, "boolean", None],
         "n_bins": [Interval(Integral, 1, None, closed="left")],
         "strategy": [StrOptions({"uniform", "quantile"})],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def calibration_curve(
     y_true,
@@ -1187,7 +1208,7 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs):
             f"(Positive class: {self.pos_label})" if self.pos_label is not None else ""
         )
 
-        line_kwargs = {}
+        line_kwargs = {"marker": "s", "linestyle": "-"}
         if name is not None:
             line_kwargs["label"] = name
         line_kwargs.update(**kwargs)
@@ -1196,9 +1217,7 @@ def plot(self, *, ax=None, name=None, ref_line=True, **kwargs):
         existing_ref_line = ref_line_label in self.ax_.get_legend_handles_labels()[1]
         if ref_line and not existing_ref_line:
             self.ax_.plot([0, 1], [0, 1], "k:", label=ref_line_label)
-        self.line_ = self.ax_.plot(self.prob_pred, self.prob_true, "s-", **line_kwargs)[
-            0
-        ]
+        self.line_ = self.ax_.plot(self.prob_pred, self.prob_true, **line_kwargs)[0]
 
         # We always have to show the legend for at least the reference line
         self.ax_.legend(loc="lower right")
diff --git a/sklearn/cluster/__init__.py b/sklearn/cluster/__init__.py
index 40b89ea0da8ba..f5d3104d816bf 100644
--- a/sklearn/cluster/__init__.py
+++ b/sklearn/cluster/__init__.py
@@ -3,27 +3,27 @@
 algorithms.
 """
 
-from ._spectral import spectral_clustering, SpectralClustering
-from ._mean_shift import mean_shift, MeanShift, estimate_bandwidth, get_bin_seeds
-from ._affinity_propagation import affinity_propagation, AffinityPropagation
+from ._affinity_propagation import AffinityPropagation, affinity_propagation
 from ._agglomerative import (
-    ward_tree,
     AgglomerativeClustering,
-    linkage_tree,
     FeatureAgglomeration,
+    linkage_tree,
+    ward_tree,
 )
-from ._kmeans import k_means, KMeans, MiniBatchKMeans, kmeans_plusplus
+from ._bicluster import SpectralBiclustering, SpectralCoclustering
+from ._birch import Birch
 from ._bisect_k_means import BisectingKMeans
-from ._dbscan import dbscan, DBSCAN
+from ._dbscan import DBSCAN, dbscan
+from ._hdbscan.hdbscan import HDBSCAN
+from ._kmeans import KMeans, MiniBatchKMeans, k_means, kmeans_plusplus
+from ._mean_shift import MeanShift, estimate_bandwidth, get_bin_seeds, mean_shift
 from ._optics import (
     OPTICS,
     cluster_optics_dbscan,
-    compute_optics_graph,
     cluster_optics_xi,
+    compute_optics_graph,
 )
-from ._bicluster import SpectralBiclustering, SpectralCoclustering
-from ._birch import Birch
-from ._hdbscan.hdbscan import HDBSCAN
+from ._spectral import SpectralClustering, spectral_clustering
 
 __all__ = [
     "AffinityPropagation",
diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 1ffc5f07e8c50..b3b5869687c22 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -5,20 +5,18 @@
 
 # License: BSD 3 clause
 
-from numbers import Integral, Real
 import warnings
+from numbers import Integral, Real
 
 import numpy as np
 
+from .._config import config_context
+from ..base import BaseEstimator, ClusterMixin, _fit_context
 from ..exceptions import ConvergenceWarning
-from ..base import BaseEstimator, ClusterMixin
-from ..base import _fit_context
+from ..metrics import euclidean_distances, pairwise_distances_argmin
 from ..utils import check_random_state
 from ..utils._param_validation import Interval, StrOptions, validate_params
 from ..utils.validation import check_is_fitted
-from ..metrics import euclidean_distances
-from ..metrics import pairwise_distances_argmin
-from .._config import config_context
 
 
 def _equal_similarities_and_preferences(S, preference):
@@ -187,7 +185,8 @@ def _affinity_propagation(
     {
         "S": ["array-like"],
         "return_n_iter": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=False,
 )
 def affinity_propagation(
     S,
diff --git a/sklearn/cluster/_agglomerative.py b/sklearn/cluster/_agglomerative.py
index b7d08a45dcd80..9a2aff524a91e 100644
--- a/sklearn/cluster/_agglomerative.py
+++ b/sklearn/cluster/_agglomerative.py
@@ -15,22 +15,25 @@
 from scipy import sparse
 from scipy.sparse.csgraph import connected_components
 
-from ..base import BaseEstimator, ClusterMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
-from ..metrics.pairwise import paired_distances
-from ..metrics.pairwise import _VALID_METRICS
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    ClusterMixin,
+    _fit_context,
+)
 from ..metrics import DistanceMetric
 from ..metrics._dist_metrics import METRIC_MAPPING64
+from ..metrics.pairwise import _VALID_METRICS, paired_distances
 from ..utils import check_array
 from ..utils._fast_dict import IntFloatDict
-from ..utils.graph import _fix_connected_components
 from ..utils._param_validation import (
+    HasMethods,
     Hidden,
     Interval,
     StrOptions,
-    HasMethods,
     validate_params,
 )
+from ..utils.graph import _fix_connected_components
 from ..utils.validation import check_memory
 
 # mypy error: Module 'sklearn.cluster' has no attribute '_hierarchical_fast'
@@ -87,11 +90,12 @@ def _fix_connectivity(X, connectivity, affinity):
     connectivity = connectivity + connectivity.T
 
     # Convert connectivity matrix to LIL
-    if not sparse.isspmatrix_lil(connectivity):
-        if not sparse.isspmatrix(connectivity):
-            connectivity = sparse.lil_matrix(connectivity)
-        else:
-            connectivity = connectivity.tolil()
+    if not sparse.issparse(connectivity):
+        connectivity = sparse.lil_matrix(connectivity)
+
+    # `connectivity` is a sparse matrix at this point
+    if connectivity.format != "lil":
+        connectivity = connectivity.tolil()
 
     # Compute the number of nodes
     n_connected_components, labels = connected_components(connectivity)
@@ -182,7 +186,8 @@ def _single_linkage_tree(
         "connectivity": ["array-like", "sparse matrix", None],
         "n_clusters": [Interval(Integral, 1, None, closed="left"), None],
         "return_distance": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def ward_tree(X, *, connectivity=None, n_clusters=None, return_distance=False):
     """Ward clustering based on a Feature matrix.
diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py
index 4133264626ebb..65280c06319d9 100644
--- a/sklearn/cluster/_bicluster.py
+++ b/sklearn/cluster/_bicluster.py
@@ -3,25 +3,19 @@
 # License: BSD 3 clause
 
 from abc import ABCMeta, abstractmethod
-
-import numpy as np
 from numbers import Integral
 
+import numpy as np
 from scipy.linalg import norm
 from scipy.sparse import dia_matrix, issparse
 from scipy.sparse.linalg import eigsh, svds
 
-from . import KMeans, MiniBatchKMeans
-from ..base import BaseEstimator, BiclusterMixin
-from ..base import _fit_context
-from ..utils import check_random_state
-from ..utils import check_scalar
-
+from ..base import BaseEstimator, BiclusterMixin, _fit_context
+from ..utils import check_random_state, check_scalar
+from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import make_nonnegative, randomized_svd, safe_sparse_dot
-
 from ..utils.validation import assert_all_finite
-from ..utils._param_validation import Interval, StrOptions
-
+from ._kmeans import KMeans, MiniBatchKMeans
 
 __all__ = ["SpectralCoclustering", "SpectralBiclustering"]
 
diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
index e74630572a014..d62fb880ba8b2 100644
--- a/sklearn/cluster/_birch.py
+++ b/sklearn/cluster/_birch.py
@@ -4,26 +4,27 @@
 # License: BSD 3 clause
 
 import warnings
-import numpy as np
+from math import sqrt
 from numbers import Integral, Real
+
+import numpy as np
 from scipy import sparse
-from math import sqrt
 
-from ..metrics import pairwise_distances_argmin
-from ..metrics.pairwise import euclidean_distances
+from .._config import config_context
 from ..base import (
-    TransformerMixin,
-    ClusterMixin,
     BaseEstimator,
     ClassNamePrefixFeaturesOutMixin,
+    ClusterMixin,
+    TransformerMixin,
     _fit_context,
 )
-from ..utils.extmath import row_norms
+from ..exceptions import ConvergenceWarning
+from ..metrics import pairwise_distances_argmin
+from ..metrics.pairwise import euclidean_distances
 from ..utils._param_validation import Interval
+from ..utils.extmath import row_norms
 from ..utils.validation import check_is_fitted
-from ..exceptions import ConvergenceWarning
 from . import AgglomerativeClustering
-from .._config import config_context
 
 
 def _iterate_sparse_X(X):
diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py
index 959d78ae85009..b98d58f6cf12b 100644
--- a/sklearn/cluster/_bisect_k_means.py
+++ b/sklearn/cluster/_bisect_k_means.py
@@ -7,18 +7,17 @@
 import scipy.sparse as sp
 
 from ..base import _fit_context
-from ._kmeans import _BaseKMeans
-from ._kmeans import _kmeans_single_elkan
-from ._kmeans import _kmeans_single_lloyd
-from ._kmeans import _labels_inertia_threadpool_limit
-from ._k_means_common import _inertia_dense
-from ._k_means_common import _inertia_sparse
-from ..utils.extmath import row_norms
 from ..utils._openmp_helpers import _openmp_effective_n_threads
-from ..utils.validation import check_is_fitted
-from ..utils.validation import _check_sample_weight
-from ..utils.validation import check_random_state
 from ..utils._param_validation import StrOptions
+from ..utils.extmath import row_norms
+from ..utils.validation import _check_sample_weight, check_is_fitted, check_random_state
+from ._k_means_common import _inertia_dense, _inertia_sparse
+from ._kmeans import (
+    _BaseKMeans,
+    _kmeans_single_elkan,
+    _kmeans_single_lloyd,
+    _labels_inertia_threadpool_limit,
+)
 
 
 class _BisectingTree:
@@ -258,7 +257,7 @@ def _inertia_per_cluster(self, X, centers, labels, sample_weight):
         X : {ndarray, csr_matrix} of shape (n_samples, n_features)
             The input samples.
 
-        centers : ndarray of shape (n_clusters, n_features)
+        centers : ndarray of shape (n_clusters=2, n_features)
             The cluster centers.
 
         labels : ndarray of shape (n_samples,)
@@ -269,13 +268,14 @@ def _inertia_per_cluster(self, X, centers, labels, sample_weight):
 
         Returns
         -------
-        inertia_per_cluster : ndarray of shape (n_clusters,)
+        inertia_per_cluster : ndarray of shape (n_clusters=2,)
             Sum of squared errors (inertia) for each cluster.
         """
+        n_clusters = centers.shape[0]  # = 2 since centers comes from a bisection
         _inertia = _inertia_sparse if sp.issparse(X) else _inertia_dense
 
-        inertia_per_cluster = np.empty(centers.shape[1])
-        for label in range(centers.shape[0]):
+        inertia_per_cluster = np.empty(n_clusters)
+        for label in range(n_clusters):
             inertia_per_cluster[label] = _inertia(
                 X, sample_weight, centers, labels, self._n_threads, single_label=label
             )
diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py
index 3c753935ac046..30205f70ae157 100644
--- a/sklearn/cluster/_dbscan.py
+++ b/sklearn/cluster/_dbscan.py
@@ -14,12 +14,11 @@
 import numpy as np
 from scipy import sparse
 
+from ..base import BaseEstimator, ClusterMixin, _fit_context
 from ..metrics.pairwise import _VALID_METRICS
-from ..base import BaseEstimator, ClusterMixin
-from ..base import _fit_context
-from ..utils.validation import _check_sample_weight
-from ..utils._param_validation import Interval, StrOptions
 from ..neighbors import NearestNeighbors
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.validation import _check_sample_weight
 from ._dbscan_inner import dbscan_inner
 
 
@@ -135,8 +134,8 @@ def dbscan(
     Another way to reduce memory and computation time is to remove
     (near-)duplicate points and use ``sample_weight`` instead.
 
-    :func:`cluster.optics <sklearn.cluster.optics>` provides a similar
-    clustering with lower memory usage.
+    :class:`~sklearn.cluster.OPTICS` provides a similar clustering with lower
+    memory usage.
 
     References
     ----------
@@ -173,6 +172,9 @@ class DBSCAN(ClusterMixin, BaseEstimator):
     Finds core samples of high density and expands clusters from them.
     Good for data which contains clusters of similar density.
 
+    The worst case memory complexity of DBSCAN is :math:`O({n}^2)`, which can
+    occur when the `eps` param is large and `min_samples` is low.
+
     Read more in the :ref:`User Guide <dbscan>`.
 
     Parameters
@@ -185,8 +187,11 @@ class DBSCAN(ClusterMixin, BaseEstimator):
         and distance function.
 
     min_samples : int, default=5
-        The number of samples (or total weight) in a neighborhood for a point
-        to be considered as a core point. This includes the point itself.
+        The number of samples (or total weight) in a neighborhood for a point to
+        be considered as a core point. This includes the point itself. If
+        `min_samples` is set to a higher value, DBSCAN will find denser clusters,
+        whereas if it is set to a lower value, the found clusters will be more
+        sparse.
 
     metric : str, or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
@@ -275,7 +280,7 @@ class DBSCAN(ClusterMixin, BaseEstimator):
     Another way to reduce memory and computation time is to remove
     (near-)duplicate points and use ``sample_weight`` instead.
 
-    :class:`cluster.OPTICS` provides a similar clustering with lower memory
+    :class:`~sklearn.cluster.OPTICS` provides a similar clustering with lower memory
     usage.
 
     References
diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py
index 55baf247a2931..f84f18c1c18b3 100644
--- a/sklearn/cluster/_feature_agglomeration.py
+++ b/sklearn/cluster/_feature_agglomeration.py
@@ -6,12 +6,13 @@
 # License: BSD 3 clause
 
 import warnings
+
 import numpy as np
+from scipy.sparse import issparse
 
 from ..base import TransformerMixin
-from ..utils.validation import check_is_fitted
 from ..utils import metadata_routing
-from scipy.sparse import issparse
+from ..utils.validation import check_is_fitted
 
 ###############################################################################
 # Mixin class for feature agglomeration.
diff --git a/sklearn/cluster/_hdbscan/_linkage.pyx b/sklearn/cluster/_hdbscan/_linkage.pyx
index 03e91ac8d6833..ee8025c8027aa 100644
--- a/sklearn/cluster/_hdbscan/_linkage.pyx
+++ b/sklearn/cluster/_hdbscan/_linkage.pyx
@@ -90,7 +90,7 @@ cpdef cnp.ndarray[MST_edge_t, ndim=1, mode='c'] mst_from_mutual_reachability(
     mst = np.empty(n_samples - 1, dtype=MST_edge_dtype)
     current_labels = np.arange(n_samples, dtype=np.int64)
     current_node = 0
-    min_reachability = np.full(n_samples, fill_value=np.infty, dtype=np.float64)
+    min_reachability = np.full(n_samples, fill_value=np.inf, dtype=np.float64)
     for i in range(0, n_samples - 1):
         label_filter = current_labels != current_node
         current_labels = current_labels[label_filter]
@@ -156,7 +156,7 @@ cpdef cnp.ndarray[MST_edge_t, ndim=1, mode='c'] mst_from_data_matrix(
     mst = np.empty(n_samples - 1, dtype=MST_edge_dtype)
 
     in_tree = np.zeros(n_samples, dtype=np.uint8)
-    min_reachability = np.full(n_samples, fill_value=np.infty, dtype=np.float64)
+    min_reachability = np.full(n_samples, fill_value=np.inf, dtype=np.float64)
     current_sources = np.ones(n_samples, dtype=np.int64)
 
     current_node = 0
diff --git a/sklearn/cluster/_hdbscan/_tree.pyx b/sklearn/cluster/_hdbscan/_tree.pyx
index c5bca48d5ebf9..df63ce4d6b0f4 100644
--- a/sklearn/cluster/_hdbscan/_tree.pyx
+++ b/sklearn/cluster/_hdbscan/_tree.pyx
@@ -133,7 +133,7 @@ cpdef cnp.ndarray[CONDENSED_t, ndim=1, mode='c'] _condense_tree(
         A single linkage hierarchy in scipy.cluster.hierarchy format.
 
     min_cluster_size : int, optional (default 10)
-        The minimum size of clusters to consider. Clusters smaler than this
+        The minimum size of clusters to consider. Clusters smaller than this
         are pruned from the tree.
 
     Returns
diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py
index 3d8ec08ac3997..57de8962250b1 100644
--- a/sklearn/cluster/_hdbscan/hdbscan.py
+++ b/sklearn/cluster/_hdbscan/hdbscan.py
@@ -46,23 +46,22 @@
 from ...metrics._dist_metrics import DistanceMetric
 from ...neighbors import BallTree, KDTree, NearestNeighbors
 from ...utils._param_validation import Interval, StrOptions
-from ...utils.validation import _assert_all_finite, _allclose_dense_sparse
-from ._reachability import mutual_reachability_graph
+from ...utils.validation import _allclose_dense_sparse, _assert_all_finite
 from ._linkage import (
+    MST_edge_dtype,
     make_single_linkage,
-    mst_from_mutual_reachability,
     mst_from_data_matrix,
-    MST_edge_dtype,
+    mst_from_mutual_reachability,
 )
-from ._tree import tree_to_labels, labelling_at_cut
-from ._tree import HIERARCHY_dtype
+from ._reachability import mutual_reachability_graph
+from ._tree import HIERARCHY_dtype, labelling_at_cut, tree_to_labels
 
-FAST_METRICS = set(KDTree.valid_metrics() + BallTree.valid_metrics())
+FAST_METRICS = set(KDTree.valid_metrics + BallTree.valid_metrics)
 
 # Encodings are arbitrary but must be strictly negative.
 # The current encodings are chosen as extensions to the -1 noise label.
 # Avoided enums so that the end user only deals with simple labels.
-_OUTLIER_ENCODING = {
+_OUTLIER_ENCODING: dict = {
     "infinite": {
         "label": -2,
         # The probability could also be 1, since infinite points are certainly
@@ -99,8 +98,8 @@ def _brute_mst(mutual_reachability, min_samples):
     Returns
     -------
     mst : ndarray of shape (n_samples - 1,), dtype=MST_edge_dtype
-        The MST representation of the mutual-reahability graph. The MST is
-        represented as a collecteion of edges.
+        The MST representation of the mutual-reachability graph. The MST is
+        represented as a collection of edges.
     """
     if not issparse(mutual_reachability):
         return mst_from_mutual_reachability(mutual_reachability)
@@ -140,8 +139,8 @@ def _process_mst(min_spanning_tree):
     Parameters
     ----------
     min_spanning_tree : ndarray of shape (n_samples - 1,), dtype=MST_edge_dtype
-        The MST representation of the mutual-reahability graph. The MST is
-        represented as a collecteion of edges.
+        The MST representation of the mutual-reachability graph. The MST is
+        represented as a collection of edges.
 
     Returns
     -------
@@ -188,7 +187,7 @@ def _hdbscan_brute(
         feature array.
 
         - If metric is a string or callable, it must be one of
-          the options allowed by :func:`~sklearn.metrics.pairwise.pairwise_distances`
+          the options allowed by :func:`~sklearn.metrics.pairwise_distances`
           for its metric parameter.
 
         - If metric is "precomputed", X is assumed to be a distance matrix and
@@ -655,7 +654,7 @@ def __init__(
         alpha=1.0,
         algorithm="auto",
         leaf_size=40,
-        n_jobs=4,
+        n_jobs=None,
         cluster_selection_method="eom",
         allow_single_cluster=False,
         store_centers=None,
@@ -722,10 +721,10 @@ def fit(self, X, y=None):
 
                 # Samples with missing data are denoted by the presence of
                 # `np.nan`
-                missing_index = list(np.isnan(reduced_X).nonzero()[0])
+                missing_index = np.isnan(reduced_X).nonzero()[0]
 
                 # Outlier samples are denoted by the presence of `np.inf`
-                infinite_index = list(np.isinf(reduced_X).nonzero()[0])
+                infinite_index = np.isinf(reduced_X).nonzero()[0]
 
                 # Continue with only finite samples
                 finite_index = _get_finite_row_indices(X)
@@ -769,14 +768,12 @@ def fit(self, X, y=None):
             n_jobs=self.n_jobs,
             **self._metric_params,
         )
-        if self.algorithm == "kdtree" and self.metric not in KDTree.valid_metrics():
+        if self.algorithm == "kdtree" and self.metric not in KDTree.valid_metrics:
             raise ValueError(
                 f"{self.metric} is not a valid metric for a KDTree-based algorithm."
                 " Please select a different metric."
             )
-        elif (
-            self.algorithm == "balltree" and self.metric not in BallTree.valid_metrics()
-        ):
+        elif self.algorithm == "balltree" and self.metric not in BallTree.valid_metrics:
             raise ValueError(
                 f"{self.metric} is not a valid metric for a BallTree-based algorithm."
                 " Please select a different metric."
@@ -806,7 +803,7 @@ def fit(self, X, y=None):
                 # We can't do much with sparse matrices ...
                 mst_func = _hdbscan_brute
                 kwargs["copy"] = self.copy
-            elif self.metric in KDTree.valid_metrics():
+            elif self.metric in KDTree.valid_metrics:
                 # TODO: Benchmark KD vs Ball Tree efficiency
                 mst_func = _hdbscan_prims
                 kwargs["algo"] = "kd_tree"
@@ -835,7 +832,7 @@ def fit(self, X, y=None):
                 self._single_linkage_tree_,
                 internal_to_raw,
                 # There may be overlap for points w/ both `np.inf` and `np.nan`
-                non_finite=set(infinite_index + missing_index),
+                non_finite=set(np.hstack([infinite_index, missing_index])),
             )
             new_labels = np.empty(self._raw_data.shape[0], dtype=np.int32)
             new_labels[finite_index] = self.labels_
@@ -903,7 +900,7 @@ def _weighted_cluster_center(self, X):
             self.medoids_ = np.empty((n_clusters, X.shape[1]), dtype=np.float64)
 
         # Need to handle iteratively seen each cluster may have a different
-        # number of samples, hence we can't create a homogenous 3D array.
+        # number of samples, hence we can't create a homogeneous 3D array.
         for idx in range(n_clusters):
             mask = self.labels_ == idx
             data = X[mask]
diff --git a/sklearn/cluster/_hdbscan/tests/test_reachibility.py b/sklearn/cluster/_hdbscan/tests/test_reachibility.py
index c8ba28d0af25b..53096dd7cbec7 100644
--- a/sklearn/cluster/_hdbscan/tests/test_reachibility.py
+++ b/sklearn/cluster/_hdbscan/tests/test_reachibility.py
@@ -1,13 +1,12 @@
 import numpy as np
 import pytest
 
+from sklearn.cluster._hdbscan._reachability import mutual_reachability_graph
 from sklearn.utils._testing import (
     _convert_container,
     assert_allclose,
 )
 
-from sklearn.cluster._hdbscan._reachability import mutual_reachability_graph
-
 
 def test_mutual_reachability_graph_error_sparse_format():
     """Check that we raise an error if the sparse format is not CSR."""
@@ -46,7 +45,7 @@ def test_mutual_reachability_graph_equivalence_dense_sparse():
     mr_graph_dense = mutual_reachability_graph(X_dense, min_samples=3)
     mr_graph_sparse = mutual_reachability_graph(X_sparse, min_samples=3)
 
-    assert_allclose(mr_graph_dense, mr_graph_sparse.A)
+    assert_allclose(mr_graph_dense, mr_graph_sparse.toarray())
 
 
 @pytest.mark.parametrize("array_type", ["array", "sparse_csr"])
diff --git a/sklearn/cluster/_k_means_common.pyx b/sklearn/cluster/_k_means_common.pyx
index 192a4bdec1088..151af55076b7b 100644
--- a/sklearn/cluster/_k_means_common.pyx
+++ b/sklearn/cluster/_k_means_common.pyx
@@ -4,10 +4,6 @@
 #
 # License: BSD 3 clause
 
-# TODO: We still need to use ndarrays instead of typed memoryviews when using
-# fused types and when the array may be read-only (for instance when it's
-# provided by the user). This is fixed in cython > 0.3.
-
 import numpy as np
 from cython cimport floating
 from cython.parallel cimport prange
diff --git a/sklearn/cluster/_k_means_elkan.pyx b/sklearn/cluster/_k_means_elkan.pyx
index 60b2d080793db..0853d5f11d5e6 100644
--- a/sklearn/cluster/_k_means_elkan.pyx
+++ b/sklearn/cluster/_k_means_elkan.pyx
@@ -2,10 +2,6 @@
 #
 # Licence: BSD 3 clause
 
-# TODO: We still need to use ndarrays instead of typed memoryviews when using
-# fused types and when the array may be read-only (for instance when it's
-# provided by the user). This is fixed in cython > 0.3.
-
 from cython cimport floating
 from cython.parallel import prange, parallel
 from libc.stdlib cimport calloc, free
@@ -263,6 +259,14 @@ def elkan_iter_chunked_dense(
         int n_features = X.shape[1]
         int n_clusters = centers_new.shape[0]
 
+    if n_samples == 0:
+        # An empty array was passed, do nothing and return early (before
+        # attempting to compute n_chunks). This can typically happen when
+        # calling the prediction function of a bisecting k-means model with a
+        # large fraction of outiers.
+        return
+
+    cdef:
         # hard-coded number of samples per chunk. Splitting in chunks is
         # necessary to get parallelism. Chunk size chosen to be same as lloyd's
         int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
@@ -498,6 +502,14 @@ def elkan_iter_chunked_sparse(
         int n_features = X.shape[1]
         int n_clusters = centers_new.shape[0]
 
+    if n_samples == 0:
+        # An empty array was passed, do nothing and return early (before
+        # attempting to compute n_chunks). This can typically happen when
+        # calling the prediction function of a bisecting k-means model with a
+        # large fraction of outiers.
+        return
+
+    cdef:
         floating[::1] X_data = X.data
         int[::1] X_indices = X.indices
         int[::1] X_indptr = X.indptr
diff --git a/sklearn/cluster/_k_means_lloyd.pyx b/sklearn/cluster/_k_means_lloyd.pyx
index 664ec0da2cea2..db7b4e259f434 100644
--- a/sklearn/cluster/_k_means_lloyd.pyx
+++ b/sklearn/cluster/_k_means_lloyd.pyx
@@ -1,9 +1,5 @@
 # Licence: BSD 3 clause
 
-# TODO: We still need to use ndarrays instead of typed memoryviews when using
-# fused types and when the array may be read-only (for instance when it's
-# provided by the user). This is fixed in cython > 0.3.
-
 from cython cimport floating
 from cython.parallel import prange, parallel
 from libc.stdlib cimport malloc, calloc, free
@@ -82,6 +78,14 @@ def lloyd_iter_chunked_dense(
         int n_features = X.shape[1]
         int n_clusters = centers_old.shape[0]
 
+    if n_samples == 0:
+        # An empty array was passed, do nothing and return early (before
+        # attempting to compute n_chunks). This can typically happen when
+        # calling the prediction function of a bisecting k-means model with a
+        # large fraction of outiers.
+        return
+
+    cdef:
         # hard-coded number of samples per chunk. Appeared to be close to
         # optimal in all situations.
         int n_samples_chunk = CHUNK_SIZE if n_samples > CHUNK_SIZE else n_samples
@@ -267,12 +271,19 @@ def lloyd_iter_chunked_sparse(
           the algorithm. This is useful especially when calling predict on a
           fitted model.
     """
-    # print(X.indices.dtype)
     cdef:
         int n_samples = X.shape[0]
         int n_features = X.shape[1]
         int n_clusters = centers_old.shape[0]
 
+    if n_samples == 0:
+        # An empty array was passed, do nothing and return early (before
+        # attempting to compute n_chunks). This can typically happen when
+        # calling the prediction function of a bisecting k-means model with a
+        # large fraction of outiers.
+        return
+
+    cdef:
         # Choose same as for dense. Does not have the same impact since with
         # sparse data the pairwise distances matrix is not precomputed.
         # However, splitting in chunks is necessary to get parallelism.
diff --git a/sklearn/cluster/_k_means_minibatch.pyx b/sklearn/cluster/_k_means_minibatch.pyx
index 503413a469e3e..22ca5255e3889 100644
--- a/sklearn/cluster/_k_means_minibatch.pyx
+++ b/sklearn/cluster/_k_means_minibatch.pyx
@@ -1,7 +1,3 @@
-# TODO: We still need to use ndarrays instead of typed memoryviews when using
-# fused types and when the array may be read-only (for instance when it's
-# provided by the user). This will be fixed in cython >= 0.3.
-
 from cython cimport floating
 from cython.parallel cimport parallel, prange
 from libc.stdlib cimport malloc, free
diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py
index b36999885a14e..d1da355290073 100644
--- a/sklearn/cluster/_kmeans.py
+++ b/sklearn/cluster/_kmeans.py
@@ -11,50 +11,48 @@
 #          Robert Layton <robertlayton@gmail.com>
 # License: BSD 3 clause
 
+import warnings
 from abc import ABC, abstractmethod
 from numbers import Integral, Real
-import warnings
 
 import numpy as np
 import scipy.sparse as sp
 
 from ..base import (
     BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
     ClusterMixin,
     TransformerMixin,
-    ClassNamePrefixFeaturesOutMixin,
     _fit_context,
 )
-from ..metrics.pairwise import euclidean_distances
-from ..metrics.pairwise import _euclidean_distances
+from ..exceptions import ConvergenceWarning
+from ..metrics.pairwise import _euclidean_distances, euclidean_distances
+from ..utils import check_array, check_random_state
+from ..utils._openmp_helpers import _openmp_effective_n_threads
+from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
 from ..utils.extmath import row_norms, stable_cumsum
-from ..utils.fixes import threadpool_limits
-from ..utils.fixes import threadpool_info
-from ..utils.sparsefuncs_fast import assign_rows_csr
+from ..utils.fixes import threadpool_info, threadpool_limits
 from ..utils.sparsefuncs import mean_variance_axis
-from ..utils import check_array
-from ..utils import check_random_state
-from ..utils.validation import check_is_fitted, _check_sample_weight
-from ..utils.validation import _is_arraylike_not_scalar
-from ..utils._param_validation import Hidden
-from ..utils._param_validation import Interval
-from ..utils._param_validation import StrOptions
-from ..utils._param_validation import validate_params
-from ..utils._openmp_helpers import _openmp_effective_n_threads
-from ..exceptions import ConvergenceWarning
-from ._k_means_common import CHUNK_SIZE
-from ._k_means_common import _inertia_dense
-from ._k_means_common import _inertia_sparse
-from ._k_means_common import _is_same_clustering
-from ._k_means_minibatch import _minibatch_update_dense
-from ._k_means_minibatch import _minibatch_update_sparse
-from ._k_means_lloyd import lloyd_iter_chunked_dense
-from ._k_means_lloyd import lloyd_iter_chunked_sparse
-from ._k_means_elkan import init_bounds_dense
-from ._k_means_elkan import init_bounds_sparse
-from ._k_means_elkan import elkan_iter_chunked_dense
-from ._k_means_elkan import elkan_iter_chunked_sparse
-
+from ..utils.sparsefuncs_fast import assign_rows_csr
+from ..utils.validation import (
+    _check_sample_weight,
+    _is_arraylike_not_scalar,
+    check_is_fitted,
+)
+from ._k_means_common import (
+    CHUNK_SIZE,
+    _inertia_dense,
+    _inertia_sparse,
+    _is_same_clustering,
+)
+from ._k_means_elkan import (
+    elkan_iter_chunked_dense,
+    elkan_iter_chunked_sparse,
+    init_bounds_dense,
+    init_bounds_sparse,
+)
+from ._k_means_lloyd import lloyd_iter_chunked_dense, lloyd_iter_chunked_sparse
+from ._k_means_minibatch import _minibatch_update_dense, _minibatch_update_sparse
 
 ###############################################################################
 # Initialization heuristic
@@ -68,7 +66,8 @@
         "x_squared_norms": ["array-like", None],
         "random_state": ["random_state"],
         "n_local_trials": [Interval(Integral, 1, None, closed="left"), None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def kmeans_plusplus(
     X,
@@ -295,24 +294,10 @@ def _tolerance(X, tol):
 @validate_params(
     {
         "X": ["array-like", "sparse matrix"],
-        "n_clusters": [Interval(Integral, 1, None, closed="left")],
         "sample_weight": ["array-like", None],
-        "init": [StrOptions({"k-means++", "random"}), callable, "array-like"],
-        "n_init": [
-            StrOptions({"auto"}),
-            Hidden(StrOptions({"warn"})),
-            Interval(Integral, 1, None, closed="left"),
-        ],
-        "max_iter": [Interval(Integral, 1, None, closed="left")],
-        "verbose": [Interval(Integral, 0, None, closed="left"), bool],
-        "tol": [Interval(Real, 0, None, closed="left")],
-        "random_state": ["random_state"],
-        "copy_x": [bool],
-        "algorithm": [
-            StrOptions({"lloyd", "elkan", "auto", "full"}, deprecated={"auto", "full"})
-        ],
         "return_n_iter": [bool],
-    }
+    },
+    prefer_skip_nested_validation=False,
 )
 def k_means(
     X,
@@ -369,7 +354,8 @@ def k_means(
         n_init consecutive runs in terms of inertia.
 
         When `n_init='auto'`, the number of runs depends on the value of init:
-        10 if using `init='random'`, 1 if using `init='k-means++'`.
+        10 if using `init='random'` or `init` is a callable;
+        1 if using `init='k-means++'` or `init` is an array-like.
 
         .. versionadded:: 1.2
            Added 'auto' option for `n_init`.
@@ -899,10 +885,14 @@ def _check_params_vs_input(self, X, default_n_init=None):
             )
             self._n_init = default_n_init
         if self._n_init == "auto":
-            if self.init == "k-means++":
+            if isinstance(self.init, str) and self.init == "k-means++":
                 self._n_init = 1
-            else:
+            elif isinstance(self.init, str) and self.init == "random":
+                self._n_init = default_n_init
+            elif callable(self.init):
                 self._n_init = default_n_init
+            else:  # array-like
+                self._n_init = 1
 
         if _is_arraylike_not_scalar(self.init) and self._n_init != 1:
             warnings.warn(
@@ -973,9 +963,9 @@ def _init_centroids(
         x_squared_norms,
         init,
         random_state,
+        sample_weight,
         init_size=None,
         n_centroids=None,
-        sample_weight=None,
     ):
         """Compute the initial centroids.
 
@@ -996,6 +986,11 @@ def _init_centroids(
             Determines random number generation for centroid initialization.
             See :term:`Glossary <random_state>`.
 
+        sample_weight : ndarray of shape (n_samples,)
+            The weights for each observation in X. `sample_weight` is not used
+            during initialization if `init` is a callable or a user provided
+            array.
+
         init_size : int, default=None
             Number of samples to randomly sample for speeding up the
             initialization (sometimes at the expense of accuracy).
@@ -1003,16 +998,12 @@ def _init_centroids(
         n_centroids : int, default=None
             Number of centroids to initialize.
             If left to 'None' the number of centroids will be equal to
-            number of clusters to form (self.n_clusters)
-
-        sample_weight : ndarray of shape (n_samples,), default=None
-            The weights for each observation in X. If None, all observations
-            are assigned equal weight. `sample_weight` is not used during
-            initialization if `init` is a callable or a user provided array.
+            number of clusters to form (self.n_clusters).
 
         Returns
         -------
         centers : ndarray of shape (n_clusters, n_features)
+            Initial centroids of clusters.
         """
         n_samples = X.shape[0]
         n_clusters = self.n_clusters if n_centroids is None else n_centroids
@@ -1233,22 +1224,25 @@ class KMeans(_BaseKMeans):
             (n_clusters, n_features), default='k-means++'
         Method for initialization:
 
-        'k-means++' : selects initial cluster centroids using sampling based on
-        an empirical probability distribution of the points' contribution to the
-        overall inertia. This technique speeds up convergence. The algorithm
-        implemented is "greedy k-means++". It differs from the vanilla k-means++
-        by making several trials at each sampling step and choosing the best centroid
-        among them.
+        * 'k-means++' : selects initial cluster centroids using sampling \
+            based on an empirical probability distribution of the points' \
+            contribution to the overall inertia. This technique speeds up \
+            convergence. The algorithm implemented is "greedy k-means++". It \
+            differs from the vanilla k-means++ by making several trials at \
+            each sampling step and choosing the best centroid among them.
 
-        'random': choose `n_clusters` observations (rows) at random from data
-        for the initial centroids.
+        * 'random': choose `n_clusters` observations (rows) at random from \
+        data for the initial centroids.
 
-        If an array is passed, it should be of shape (n_clusters, n_features)
+        * If an array is passed, it should be of shape (n_clusters, n_features)\
         and gives the initial centers.
 
-        If a callable is passed, it should take arguments X, n_clusters and a
+        * If a callable is passed, it should take arguments X, n_clusters and a\
         random state and return an initialization.
 
+        For an example of how to use the different `init` strategy, see the example
+        entitled :ref:`sphx_glr_auto_examples_cluster_plot_kmeans_digits.py`.
+
     n_init : 'auto' or int, default=10
         Number of times the k-means algorithm is run with different centroid
         seeds. The final results is the best output of `n_init` consecutive runs
@@ -1256,7 +1250,8 @@ class KMeans(_BaseKMeans):
         high-dimensional problems (see :ref:`kmeans_sparse_high_dim`).
 
         When `n_init='auto'`, the number of runs depends on the value of init:
-        10 if using `init='random'`, 1 if using `init='k-means++'`.
+        10 if using `init='random'` or `init` is a callable;
+        1 if using `init='k-means++'` or `init` is an array-like.
 
         .. versionadded:: 1.2
            Added 'auto' option for `n_init`.
@@ -1792,7 +1787,8 @@ class MiniBatchKMeans(_BaseKMeans):
         :ref:`kmeans_sparse_high_dim`).
 
         When `n_init='auto'`, the number of runs depends on the value of init:
-        3 if using `init='random'`, 1 if using `init='k-means++'`.
+        3 if using `init='random'` or `init` is a callable;
+        1 if using `init='k-means++'` or `init` is an array-like.
 
         .. versionadded:: 1.2
            Added 'auto' option for `n_init`.
diff --git a/sklearn/cluster/_mean_shift.py b/sklearn/cluster/_mean_shift.py
index 6b0f227d011f9..a3ca7efba8743 100644
--- a/sklearn/cluster/_mean_shift.py
+++ b/sklearn/cluster/_mean_shift.py
@@ -14,20 +14,20 @@
 #          Gael Varoquaux <gael.varoquaux@normalesup.org>
 #          Martino Sorbaro <martino.sorbaro@ed.ac.uk>
 
-import numpy as np
 import warnings
+from collections import defaultdict
 from numbers import Integral, Real
 
-from collections import defaultdict
+import numpy as np
+
+from .._config import config_context
+from ..base import BaseEstimator, ClusterMixin, _fit_context
+from ..metrics.pairwise import pairwise_distances_argmin
+from ..neighbors import NearestNeighbors
+from ..utils import check_array, check_random_state, gen_batches
 from ..utils._param_validation import Interval, validate_params
+from ..utils.parallel import Parallel, delayed
 from ..utils.validation import check_is_fitted
-from ..utils.parallel import delayed, Parallel
-from ..utils import check_random_state, gen_batches, check_array
-from ..base import BaseEstimator, ClusterMixin
-from ..base import _fit_context
-from ..neighbors import NearestNeighbors
-from ..metrics.pairwise import pairwise_distances_argmin
-from .._config import config_context
 
 
 @validate_params(
@@ -37,7 +37,8 @@
         "n_samples": [Interval(Integral, 1, None, closed="left"), None],
         "random_state": ["random_state"],
         "n_jobs": [Integral, None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def estimate_bandwidth(X, *, quantile=0.3, n_samples=None, random_state=0, n_jobs=None):
     """Estimate the bandwidth to use with the mean-shift algorithm.
@@ -120,7 +121,10 @@ def _mean_shift_single_seed(my_mean, X, nbrs, max_iter):
     return tuple(my_mean), len(points_within), completed_iterations
 
 
-@validate_params({"X": ["array-like"]})
+@validate_params(
+    {"X": ["array-like"]},
+    prefer_skip_nested_validation=False,
+)
 def mean_shift(
     X,
     *,
diff --git a/sklearn/cluster/_optics.py b/sklearn/cluster/_optics.py
index ca1c74d6f44e7..c1665b28d0060 100755
--- a/sklearn/cluster/_optics.py
+++ b/sklearn/cluster/_optics.py
@@ -10,23 +10,26 @@
 License: BSD 3 clause
 """
 
+import warnings
 from numbers import Integral, Real
 
-import warnings
 import numpy as np
+from scipy.sparse import SparseEfficiencyWarning, issparse
 
+from ..base import BaseEstimator, ClusterMixin, _fit_context
 from ..exceptions import DataConversionWarning
-from ..metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS
-from ..metrics.pairwise import _VALID_METRICS
+from ..metrics import pairwise_distances
+from ..metrics.pairwise import _VALID_METRICS, PAIRWISE_BOOLEAN_FUNCTIONS
+from ..neighbors import NearestNeighbors
 from ..utils import gen_batches, get_chunk_n_rows
-from ..utils._param_validation import Interval, HasMethods, StrOptions, validate_params
-from ..utils._param_validation import RealNotInt
+from ..utils._param_validation import (
+    HasMethods,
+    Interval,
+    RealNotInt,
+    StrOptions,
+    validate_params,
+)
 from ..utils.validation import check_memory
-from ..neighbors import NearestNeighbors
-from ..base import BaseEstimator, ClusterMixin
-from ..base import _fit_context
-from ..metrics import pairwise_distances
-from scipy.sparse import issparse, SparseEfficiencyWarning
 
 
 class OPTICS(ClusterMixin, BaseEstimator):
@@ -135,8 +138,8 @@ class OPTICS(ClusterMixin, BaseEstimator):
     algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
         Algorithm used to compute the nearest neighbors:
 
-        - 'ball_tree' will use :class:`BallTree`.
-        - 'kd_tree' will use :class:`KDTree`.
+        - 'ball_tree' will use :class:`~sklearn.neighbors.BallTree`.
+        - 'kd_tree' will use :class:`~sklearn.neighbors.KDTree`.
         - 'brute' will use a brute-force search.
         - 'auto' (default) will attempt to decide the most appropriate
           algorithm based on the values passed to :meth:`fit` method.
@@ -145,10 +148,10 @@ class OPTICS(ClusterMixin, BaseEstimator):
         this parameter, using brute force.
 
     leaf_size : int, default=30
-        Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can
-        affect the speed of the construction and query, as well as the memory
-        required to store the tree. The optimal value depends on the
-        nature of the problem.
+        Leaf size passed to :class:`~sklearn.neighbors.BallTree` or
+        :class:`~sklearn.neighbors.KDTree`. This can affect the speed of the
+        construction and query, as well as the memory required to store the
+        tree. The optimal value depends on the nature of the problem.
 
     memory : str or object with the joblib.Memory interface, default=None
         Used to cache the output of the computation of the tree.
@@ -230,6 +233,9 @@ class OPTICS(ClusterMixin, BaseEstimator):
     >>> clustering = OPTICS(min_samples=2).fit(X)
     >>> clustering.labels_
     array([0, 0, 0, 1, 1, 1])
+
+    For a more detailed example see
+    :ref:`sphx_glr_auto_examples_cluster_plot_optics.py`.
     """
 
     _parameter_constraints: dict = {
@@ -444,7 +450,8 @@ def _compute_core_distances_(X, neighbors, min_samples, working_memory):
         "algorithm": [StrOptions({"auto", "brute", "ball_tree", "kd_tree"})],
         "leaf_size": [Interval(Integral, 1, None, closed="left")],
         "n_jobs": [Integral, None],
-    }
+    },
+    prefer_skip_nested_validation=False,  # metric is not validated yet
 )
 def compute_optics_graph(
     X, *, min_samples, max_eps, metric, p, metric_params, algorithm, leaf_size, n_jobs
@@ -499,7 +506,7 @@ def compute_optics_graph(
         .. note::
            `'kulsinski'` is deprecated from SciPy 1.9 and will be removed in SciPy 1.11.
 
-    p : int, default=2
+    p : float, default=2
         Parameter for the Minkowski metric from
         :class:`~sklearn.metrics.pairwise_distances`. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
@@ -511,20 +518,20 @@ def compute_optics_graph(
     algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
         Algorithm used to compute the nearest neighbors:
 
-        - 'ball_tree' will use :class:`BallTree`.
-        - 'kd_tree' will use :class:`KDTree`.
+        - 'ball_tree' will use :class:`~sklearn.neighbors.BallTree`.
+        - 'kd_tree' will use :class:`~sklearn.neighbors.KDTree`.
         - 'brute' will use a brute-force search.
         - 'auto' will attempt to decide the most appropriate algorithm
-          based on the values passed to :meth:`fit` method. (default)
+          based on the values passed to `fit` method. (default)
 
         Note: fitting on sparse input will override the setting of
         this parameter, using brute force.
 
     leaf_size : int, default=30
-        Leaf size passed to :class:`BallTree` or :class:`KDTree`. This can
-        affect the speed of the construction and query, as well as the memory
-        required to store the tree. The optimal value depends on the
-        nature of the problem.
+        Leaf size passed to :class:`~sklearn.neighbors.BallTree` or
+        :class:`~sklearn.neighbors.KDTree`. This can affect the speed of the
+        construction and query, as well as the memory required to store the
+        tree. The optimal value depends on the nature of the problem.
 
     n_jobs : int, default=None
         The number of parallel jobs to run for neighbors search.
@@ -683,7 +690,8 @@ def _set_reach_dist(
         "core_distances": [np.ndarray],
         "ordering": [np.ndarray],
         "eps": [Interval(Real, 0, None, closed="both")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def cluster_optics_dbscan(*, reachability, core_distances, ordering, eps):
     """Perform DBSCAN extraction for an arbitrary epsilon.
@@ -739,7 +747,8 @@ def cluster_optics_dbscan(*, reachability, core_distances, ordering, eps):
         ],
         "xi": [Interval(Real, 0, 1, closed="both")],
         "predecessor_correction": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def cluster_optics_xi(
     *,
diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index f72db4b7c1da3..975800fae6d89 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -6,21 +6,19 @@
 #         Andrew Knyazev <Andrew.Knyazev@ucdenver.edu>
 # License: BSD 3 clause
 
-from numbers import Integral, Real
 import warnings
+from numbers import Integral, Real
 
 import numpy as np
-
 from scipy.linalg import LinAlgError, qr, svd
 from scipy.sparse import csc_matrix
 
-from ..base import BaseEstimator, ClusterMixin
-from ..base import _fit_context
-from ..utils._param_validation import Interval, StrOptions, validate_params
-from ..utils import check_random_state, as_float_array
-from ..metrics.pairwise import pairwise_kernels, KERNEL_PARAMS
-from ..neighbors import kneighbors_graph, NearestNeighbors
+from ..base import BaseEstimator, ClusterMixin, _fit_context
 from ..manifold import spectral_embedding
+from ..metrics.pairwise import KERNEL_PARAMS, pairwise_kernels
+from ..neighbors import NearestNeighbors, kneighbors_graph
+from ..utils import as_float_array, check_random_state
+from ..utils._param_validation import Interval, StrOptions, validate_params
 from ._kmeans import k_means
 
 
@@ -191,7 +189,10 @@ def discretize(
     return labels
 
 
-@validate_params({"affinity": ["array-like", "sparse matrix"]})
+@validate_params(
+    {"affinity": ["array-like", "sparse matrix"]},
+    prefer_skip_nested_validation=False,
+)
 def spectral_clustering(
     affinity,
     *,
@@ -438,7 +439,7 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
            of precomputed distances, and construct a binary affinity matrix
            from the ``n_neighbors`` nearest neighbors of each instance.
          - one of the kernels supported by
-           :func:`~sklearn.metrics.pairwise_kernels`.
+           :func:`~sklearn.metrics.pairwise.pairwise_kernels`.
 
         Only kernels that produce similarity scores (non-negative values that
         increase with similarity) should be used. This property is not checked
@@ -449,7 +450,7 @@ class SpectralClustering(ClusterMixin, BaseEstimator):
         the nearest neighbors method. Ignored for ``affinity='rbf'``.
 
     eigen_tol : float, default="auto"
-        Stopping criterion for eigendecomposition of the Laplacian matrix.
+        Stopping criterion for eigen decomposition of the Laplacian matrix.
         If `eigen_tol="auto"` then the passed tolerance will depend on the
         `eigen_solver`:
 
diff --git a/sklearn/cluster/tests/common.py b/sklearn/cluster/tests/common.py
index 0f4bd9e14926d..b1fe047fe230a 100644
--- a/sklearn/cluster/tests/common.py
+++ b/sklearn/cluster/tests/common.py
@@ -5,7 +5,6 @@
 
 import numpy as np
 
-
 ###############################################################################
 # Generate sample data
 
diff --git a/sklearn/cluster/tests/test_affinity_propagation.py b/sklearn/cluster/tests/test_affinity_propagation.py
index 52007c375f667..136d2fe6fd781 100644
--- a/sklearn/cluster/tests/test_affinity_propagation.py
+++ b/sklearn/cluster/tests/test_affinity_propagation.py
@@ -3,20 +3,18 @@
 
 """
 
-import numpy as np
-import pytest
 import warnings
 
+import numpy as np
+import pytest
 from scipy.sparse import csr_matrix
 
-from sklearn.exceptions import ConvergenceWarning, NotFittedError
-from sklearn.utils._testing import assert_array_equal, assert_allclose
-
-from sklearn.cluster import AffinityPropagation
+from sklearn.cluster import AffinityPropagation, affinity_propagation
 from sklearn.cluster._affinity_propagation import _equal_similarities_and_preferences
-from sklearn.cluster import affinity_propagation
 from sklearn.datasets import make_blobs
+from sklearn.exceptions import ConvergenceWarning, NotFittedError
 from sklearn.metrics import euclidean_distances
+from sklearn.utils._testing import assert_allclose, assert_array_equal
 
 n_clusters = 3
 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
diff --git a/sklearn/cluster/tests/test_bicluster.py b/sklearn/cluster/tests/test_bicluster.py
index 0a68e97d6fb22..6d4a1067c4048 100644
--- a/sklearn/cluster/tests/test_bicluster.py
+++ b/sklearn/cluster/tests/test_bicluster.py
@@ -4,23 +4,21 @@
 import pytest
 from scipy.sparse import csr_matrix, issparse
 
-from sklearn.model_selection import ParameterGrid
-
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-
 from sklearn.base import BaseEstimator, BiclusterMixin
-
-from sklearn.cluster import SpectralCoclustering
-from sklearn.cluster import SpectralBiclustering
-from sklearn.cluster._bicluster import _scale_normalize
-from sklearn.cluster._bicluster import _bistochastic_normalize
-from sklearn.cluster._bicluster import _log_normalize
-
-from sklearn.metrics import consensus_score, v_measure_score
-
+from sklearn.cluster import SpectralBiclustering, SpectralCoclustering
+from sklearn.cluster._bicluster import (
+    _bistochastic_normalize,
+    _log_normalize,
+    _scale_normalize,
+)
 from sklearn.datasets import make_biclusters, make_checkerboard
+from sklearn.metrics import consensus_score, v_measure_score
+from sklearn.model_selection import ParameterGrid
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 
 class MockBiclustering(BiclusterMixin, BaseEstimator):
diff --git a/sklearn/cluster/tests/test_birch.py b/sklearn/cluster/tests/test_birch.py
index c2f3c06d15ba7..7fb83f0803f02 100644
--- a/sklearn/cluster/tests/test_birch.py
+++ b/sklearn/cluster/tests/test_birch.py
@@ -2,19 +2,16 @@
 Tests for the birch clustering algorithm.
 """
 
-from scipy import sparse
 import numpy as np
 import pytest
+from scipy import sparse
 
+from sklearn.cluster import AgglomerativeClustering, Birch
 from sklearn.cluster.tests.common import generate_clustered_data
-from sklearn.cluster import Birch
-from sklearn.cluster import AgglomerativeClustering
 from sklearn.datasets import make_blobs
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.metrics import pairwise_distances_argmin, v_measure_score
-
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
+from sklearn.utils._testing import assert_allclose, assert_array_equal
 
 
 def test_n_samples_leaves_roots(global_random_seed, global_dtype):
diff --git a/sklearn/cluster/tests/test_bisect_k_means.py b/sklearn/cluster/tests/test_bisect_k_means.py
index c79cd0bcca3e8..24e8ee60abf9d 100644
--- a/sklearn/cluster/tests/test_bisect_k_means.py
+++ b/sklearn/cluster/tests/test_bisect_k_means.py
@@ -2,9 +2,9 @@
 import pytest
 import scipy.sparse as sp
 
-from sklearn.utils._testing import assert_array_equal, assert_allclose
 from sklearn.cluster import BisectingKMeans
 from sklearn.metrics import v_measure_score
+from sklearn.utils._testing import assert_allclose, assert_array_equal
 
 
 @pytest.mark.parametrize("bisecting_strategy", ["biggest_inertia", "largest_cluster"])
@@ -132,3 +132,26 @@ def test_float32_float64_equivalence(is_sparse):
 
     assert_allclose(km32.cluster_centers_, km64.cluster_centers_)
     assert_array_equal(km32.labels_, km64.labels_)
+
+
+@pytest.mark.parametrize("algorithm", ("lloyd", "elkan"))
+def test_no_crash_on_empty_bisections(algorithm):
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/27081
+    rng = np.random.RandomState(0)
+    X_train = rng.rand(3000, 10)
+    bkm = BisectingKMeans(n_clusters=10, algorithm=algorithm).fit(X_train)
+
+    # predict on scaled data to trigger pathologic case
+    # where the inner mask leads to empty bisections.
+    X_test = 50 * rng.rand(100, 10)
+    labels = bkm.predict(X_test)  # should not crash with idiv by 0
+    assert np.isin(np.unique(labels), np.arange(10)).all()
+
+
+def test_one_feature():
+    # Check that no error is raised when there is only one feature
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/27236
+    X = np.random.normal(size=(128, 1))
+    BisectingKMeans(bisecting_strategy="biggest_inertia", random_state=0).fit(X)
diff --git a/sklearn/cluster/tests/test_dbscan.py b/sklearn/cluster/tests/test_dbscan.py
index f36eb19caeb0f..972820c6cc137 100644
--- a/sklearn/cluster/tests/test_dbscan.py
+++ b/sklearn/cluster/tests/test_dbscan.py
@@ -3,23 +3,18 @@
 """
 
 import pickle
-
-import numpy as np
-
 import warnings
 
-from scipy.spatial import distance
-from scipy import sparse
-
+import numpy as np
 import pytest
+from scipy import sparse
+from scipy.spatial import distance
 
-from sklearn.utils._testing import assert_array_equal
-from sklearn.neighbors import NearestNeighbors
-from sklearn.cluster import DBSCAN
-from sklearn.cluster import dbscan
+from sklearn.cluster import DBSCAN, dbscan
 from sklearn.cluster.tests.common import generate_clustered_data
 from sklearn.metrics.pairwise import pairwise_distances
-
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils._testing import assert_array_equal
 
 n_clusters = 3
 X = generate_clustered_data(n_clusters=n_clusters)
diff --git a/sklearn/cluster/tests/test_feature_agglomeration.py b/sklearn/cluster/tests/test_feature_agglomeration.py
index 3db2862384c74..121e8f2cfe400 100644
--- a/sklearn/cluster/tests/test_feature_agglomeration.py
+++ b/sklearn/cluster/tests/test_feature_agglomeration.py
@@ -3,13 +3,14 @@
 """
 # Authors: Sergul Aydore 2017
 import warnings
-import numpy as np
 
-from numpy.testing import assert_array_equal
+import numpy as np
 import pytest
+from numpy.testing import assert_array_equal
+
 from sklearn.cluster import FeatureAgglomeration
-from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.datasets import make_blobs
+from sklearn.utils._testing import assert_array_almost_equal
 
 
 def test_feature_agglomeration():
diff --git a/sklearn/cluster/tests/test_hdbscan.py b/sklearn/cluster/tests/test_hdbscan.py
index b652a99aa221f..8f4629f573842 100644
--- a/sklearn/cluster/tests/test_hdbscan.py
+++ b/sklearn/cluster/tests/test_hdbscan.py
@@ -8,6 +8,12 @@
 from scipy.spatial import distance
 
 from sklearn.cluster import HDBSCAN
+from sklearn.cluster._hdbscan._tree import (
+    CONDENSED_dtype,
+    _condense_tree,
+    _do_labelling,
+)
+from sklearn.cluster._hdbscan.hdbscan import _OUTLIER_ENCODING
 from sklearn.datasets import make_blobs
 from sklearn.metrics import fowlkes_mallows_score
 from sklearn.metrics.pairwise import _VALID_METRICS, euclidean_distances
@@ -15,12 +21,6 @@
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import shuffle
 from sklearn.utils._testing import assert_allclose, assert_array_equal
-from sklearn.cluster._hdbscan.hdbscan import _OUTLIER_ENCODING
-from sklearn.cluster._hdbscan._tree import (
-    _do_labelling,
-    _condense_tree,
-    CONDENSED_dtype,
-)
 
 n_clusters_true = 3
 X, y = make_blobs(n_samples=200, random_state=10)
@@ -165,7 +165,7 @@ def test_hdbscan_algorithms(algo, metric):
         metric_params=metric_params,
     )
 
-    if metric not in ALGOS_TREES[algo].valid_metrics():
+    if metric not in ALGOS_TREES[algo].valid_metrics:
         with pytest.raises(ValueError):
             hdb.fit(X)
     elif metric == "wminkowski":
@@ -287,22 +287,37 @@ def test_hdbscan_precomputed_non_brute(tree):
 def test_hdbscan_sparse():
     """
     Tests that HDBSCAN works correctly when passing sparse feature data.
+    Evaluates correctness by comparing against the same data passed as a dense
+    array.
     """
-    sparse_X = sparse.csr_matrix(X)
 
-    labels = HDBSCAN().fit(sparse_X).labels_
-    n_clusters = len(set(labels) - OUTLIER_SET)
+    dense_labels = HDBSCAN().fit(X).labels_
+    n_clusters = len(set(dense_labels) - OUTLIER_SET)
     assert n_clusters == 3
 
-    sparse_X_nan = sparse_X.copy()
-    sparse_X_nan[0, 0] = np.nan
-    labels = HDBSCAN().fit(sparse_X_nan).labels_
-    n_clusters = len(set(labels) - OUTLIER_SET)
-    assert n_clusters == 3
+    _X_sparse = sparse.csr_matrix(X)
+    X_sparse = _X_sparse.copy()
+    sparse_labels = HDBSCAN().fit(X_sparse).labels_
+    assert_array_equal(dense_labels, sparse_labels)
+
+    # Compare that the sparse and dense non-precomputed routines return the same labels
+    # where the 0th observation contains the outlier.
+    for outlier_val, outlier_type in ((np.inf, "infinite"), (np.nan, "missing")):
+        X_dense = X.copy()
+        X_dense[0, 0] = outlier_val
+        dense_labels = HDBSCAN().fit(X_dense).labels_
+        n_clusters = len(set(dense_labels) - OUTLIER_SET)
+        assert n_clusters == 3
+        assert dense_labels[0] == _OUTLIER_ENCODING[outlier_type]["label"]
+
+        X_sparse = _X_sparse.copy()
+        X_sparse[0, 0] = outlier_val
+        sparse_labels = HDBSCAN().fit(X_sparse).labels_
+        assert_array_equal(dense_labels, sparse_labels)
 
     msg = "Sparse data matrices only support algorithm `brute`."
     with pytest.raises(ValueError, match=msg):
-        HDBSCAN(metric="euclidean", algorithm="balltree").fit(sparse_X)
+        HDBSCAN(metric="euclidean", algorithm="balltree").fit(X_sparse)
 
 
 @pytest.mark.parametrize("algorithm", ALGORITHMS)
@@ -424,7 +439,7 @@ def test_hdbscan_tree_invalid_metric():
 
     # The set of valid metrics for KDTree at the time of writing this test is a
     # strict subset of those supported in BallTree
-    metrics_not_kd = list(set(BallTree.valid_metrics()) - set(KDTree.valid_metrics()))
+    metrics_not_kd = list(set(BallTree.valid_metrics) - set(KDTree.valid_metrics))
     if len(metrics_not_kd) > 0:
         with pytest.raises(ValueError, match=msg):
             HDBSCAN(algorithm="kdtree", metric=metrics_not_kd[0]).fit(X)
diff --git a/sklearn/cluster/tests/test_hierarchical.py b/sklearn/cluster/tests/test_hierarchical.py
index acaf3c27bedb1..95f28413d132d 100644
--- a/sklearn/cluster/tests/test_hierarchical.py
+++ b/sklearn/cluster/tests/test_hierarchical.py
@@ -6,48 +6,48 @@
 #          Matteo Visconti di Oleggio Castello 2014
 # License: BSD 3 clause
 import itertools
-from tempfile import mkdtemp
 import shutil
-import pytest
 from functools import partial
+from tempfile import mkdtemp
 
 import numpy as np
+import pytest
 from scipy import sparse
 from scipy.cluster import hierarchy
 from scipy.sparse.csgraph import connected_components
 
-from sklearn.metrics.cluster import adjusted_rand_score
-from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS
-from sklearn.utils._testing import assert_almost_equal, create_memmap_backed_data
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.cluster import ward_tree
-from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration
+from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration, ward_tree
 from sklearn.cluster._agglomerative import (
-    _hc_cut,
     _TREE_BUILDERS,
-    linkage_tree,
     _fix_connectivity,
+    _hc_cut,
+    linkage_tree,
+)
+from sklearn.cluster._hierarchical_fast import (
+    average_merge,
+    max_merge,
+    mst_linkage_core,
 )
+from sklearn.datasets import make_circles, make_moons
 from sklearn.feature_extraction.image import grid_to_graph
 from sklearn.metrics import DistanceMetric
+from sklearn.metrics.cluster import adjusted_rand_score, normalized_mutual_info_score
 from sklearn.metrics.pairwise import (
     PAIRED_DISTANCES,
     cosine_distances,
     manhattan_distances,
     pairwise_distances,
 )
-from sklearn.metrics.cluster import normalized_mutual_info_score
+from sklearn.metrics.tests.test_dist_metrics import METRICS_DEFAULT_PARAMS
 from sklearn.neighbors import kneighbors_graph
-from sklearn.cluster._hierarchical_fast import (
-    average_merge,
-    max_merge,
-    mst_linkage_core,
-)
 from sklearn.utils._fast_dict import IntFloatDict
-from sklearn.utils._testing import assert_array_equal
-from sklearn.datasets import make_moons, make_circles
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    create_memmap_backed_data,
+    ignore_warnings,
+)
 
 
 def test_linkage_misc():
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index c11d5dd3165c0..f1156a71a1ab2 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -2,37 +2,36 @@
 import re
 import sys
 import warnings
+from io import StringIO
 
 import numpy as np
-from scipy import sparse as sp
-
 import pytest
+from scipy import sparse as sp
 
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils.fixes import threadpool_limits
 from sklearn.base import clone
+from sklearn.cluster import KMeans, MiniBatchKMeans, k_means, kmeans_plusplus
+from sklearn.cluster._k_means_common import (
+    _euclidean_dense_dense_wrapper,
+    _euclidean_sparse_dense_wrapper,
+    _inertia_dense,
+    _inertia_sparse,
+    _is_same_clustering,
+    _relocate_empty_clusters_dense,
+    _relocate_empty_clusters_sparse,
+)
+from sklearn.cluster._kmeans import _labels_inertia, _mini_batch_step
+from sklearn.datasets import make_blobs
 from sklearn.exceptions import ConvergenceWarning
-
-from sklearn.utils.extmath import row_norms
-from sklearn.metrics import pairwise_distances
-from sklearn.metrics import pairwise_distances_argmin
-from sklearn.metrics.pairwise import euclidean_distances
+from sklearn.metrics import pairwise_distances, pairwise_distances_argmin
 from sklearn.metrics.cluster import v_measure_score
-from sklearn.cluster import KMeans, k_means, kmeans_plusplus
-from sklearn.cluster import MiniBatchKMeans
-from sklearn.cluster._kmeans import _labels_inertia
-from sklearn.cluster._kmeans import _mini_batch_step
-from sklearn.cluster._k_means_common import _relocate_empty_clusters_dense
-from sklearn.cluster._k_means_common import _relocate_empty_clusters_sparse
-from sklearn.cluster._k_means_common import _euclidean_dense_dense_wrapper
-from sklearn.cluster._k_means_common import _euclidean_sparse_dense_wrapper
-from sklearn.cluster._k_means_common import _inertia_dense
-from sklearn.cluster._k_means_common import _inertia_sparse
-from sklearn.cluster._k_means_common import _is_same_clustering
-from sklearn.utils._testing import create_memmap_backed_data
-from sklearn.datasets import make_blobs
-from io import StringIO
+from sklearn.metrics.pairwise import euclidean_distances
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_equal,
+    create_memmap_backed_data,
+)
+from sklearn.utils.extmath import row_norms
+from sklearn.utils.fixes import threadpool_limits
 
 # TODO(1.4): Remove
 msg = (
@@ -349,6 +348,37 @@ def test_minibatch_kmeans_partial_fit_init(init):
     _check_fitted_model(km)
 
 
+@pytest.mark.parametrize(
+    "init, expected_n_init",
+    [
+        ("k-means++", 1),
+        ("random", "default"),
+        (
+            lambda X, n_clusters, random_state: random_state.uniform(
+                size=(n_clusters, X.shape[1])
+            ),
+            "default",
+        ),
+        ("array-like", 1),
+    ],
+)
+@pytest.mark.parametrize("Estimator", [KMeans, MiniBatchKMeans])
+def test_kmeans_init_auto_with_initial_centroids(Estimator, init, expected_n_init):
+    """Check that `n_init="auto"` chooses the right number of initializations.
+    Non-regression test for #26657:
+    https://github.com/scikit-learn/scikit-learn/pull/26657
+    """
+    n_sample, n_features, n_clusters = 100, 10, 5
+    X = np.random.randn(n_sample, n_features)
+    if init == "array-like":
+        init = np.random.randn(n_clusters, n_features)
+    if expected_n_init == "default":
+        expected_n_init = 3 if Estimator is MiniBatchKMeans else 10
+
+    kmeans = Estimator(n_clusters=n_clusters, init=init, n_init="auto").fit(X)
+    assert kmeans._n_init == expected_n_init
+
+
 @pytest.mark.parametrize("Estimator", [KMeans, MiniBatchKMeans])
 def test_fortran_aligned_data(Estimator, global_random_seed):
     # Check that KMeans works with fortran-aligned data.
diff --git a/sklearn/cluster/tests/test_mean_shift.py b/sklearn/cluster/tests/test_mean_shift.py
index db13e4d18650f..265c72d0c4ce1 100644
--- a/sklearn/cluster/tests/test_mean_shift.py
+++ b/sklearn/cluster/tests/test_mean_shift.py
@@ -3,20 +3,15 @@
 
 """
 
-import numpy as np
 import warnings
-import pytest
 
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
+import numpy as np
+import pytest
 
-from sklearn.cluster import MeanShift
-from sklearn.cluster import mean_shift
-from sklearn.cluster import estimate_bandwidth
-from sklearn.cluster import get_bin_seeds
+from sklearn.cluster import MeanShift, estimate_bandwidth, get_bin_seeds, mean_shift
 from sklearn.datasets import make_blobs
 from sklearn.metrics import v_measure_score
-
+from sklearn.utils._testing import assert_allclose, assert_array_equal
 
 n_clusters = 3
 centers = np.array([[1, 1], [-1, -1], [1, -1]]) + 10
diff --git a/sklearn/cluster/tests/test_optics.py b/sklearn/cluster/tests/test_optics.py
index 0acf818912c0f..d7bf4034ab98a 100644
--- a/sklearn/cluster/tests/test_optics.py
+++ b/sklearn/cluster/tests/test_optics.py
@@ -1,24 +1,21 @@
 # Authors: Shane Grigsby <refuge@rocktalus.com>
 #          Adrin Jalali <adrin.jalali@gmail.com>
 # License: BSD 3 clause
+import warnings
+
 import numpy as np
 import pytest
 from scipy import sparse
-import warnings
 
-from sklearn.datasets import make_blobs
-from sklearn.cluster import OPTICS
+from sklearn.cluster import DBSCAN, OPTICS
 from sklearn.cluster._optics import _extend_region, _extract_xi_labels
-from sklearn.exceptions import DataConversionWarning
+from sklearn.cluster.tests.common import generate_clustered_data
+from sklearn.datasets import make_blobs
+from sklearn.exceptions import DataConversionWarning, EfficiencyWarning
 from sklearn.metrics.cluster import contingency_matrix
 from sklearn.metrics.pairwise import pairwise_distances
-from sklearn.cluster import DBSCAN
 from sklearn.utils import shuffle
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.exceptions import EfficiencyWarning
-from sklearn.cluster.tests.common import generate_clustered_data
-
+from sklearn.utils._testing import assert_allclose, assert_array_equal
 
 rng = np.random.RandomState(0)
 n_points_per_cluster = 10
diff --git a/sklearn/cluster/tests/test_spectral.py b/sklearn/cluster/tests/test_spectral.py
index d301f06e92075..3813a45c13719 100644
--- a/sklearn/cluster/tests/test_spectral.py
+++ b/sklearn/cluster/tests/test_spectral.py
@@ -1,24 +1,21 @@
 """Testing for Spectral Clustering methods"""
+import pickle
 import re
 
 import numpy as np
+import pytest
 from scipy import sparse
 from scipy.linalg import LinAlgError
 
-import pytest
-
-import pickle
-
-from sklearn.utils import check_random_state
-from sklearn.utils._testing import assert_array_equal
-
 from sklearn.cluster import SpectralClustering, spectral_clustering
-from sklearn.cluster._spectral import discretize, cluster_qr
+from sklearn.cluster._spectral import cluster_qr, discretize
+from sklearn.datasets import make_blobs
 from sklearn.feature_extraction import img_to_graph
 from sklearn.metrics import adjusted_rand_score
 from sklearn.metrics.pairwise import kernel_metrics, rbf_kernel
 from sklearn.neighbors import NearestNeighbors
-from sklearn.datasets import make_blobs
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_equal
 
 try:
     from pyamg import smoothed_aggregation_solver  # noqa
@@ -227,6 +224,10 @@ def test_discretize(n_samples):
 @pytest.mark.filterwarnings(
     "ignore:scipy.linalg.pinv2 is deprecated:DeprecationWarning:pyamg.*"
 )
+# TODO: Remove when pyamg removes the use of np.find_common_type
+@pytest.mark.filterwarnings(
+    "ignore:np.find_common_type is deprecated:DeprecationWarning:pyamg.*"
+)
 def test_spectral_clustering_with_arpack_amg_solvers():
     # Test that spectral_clustering is the same for arpack and amg solver
     # Based on toy example from plot_segmentation_toy.py
diff --git a/sklearn/compose/__init__.py b/sklearn/compose/__init__.py
index 8be8d17040e82..7b137cdf9e07f 100644
--- a/sklearn/compose/__init__.py
+++ b/sklearn/compose/__init__.py
@@ -7,12 +7,11 @@
 
 from ._column_transformer import (
     ColumnTransformer,
-    make_column_transformer,
     make_column_selector,
+    make_column_transformer,
 )
 from ._target import TransformedTargetRegressor
 
-
 __all__ = [
     "ColumnTransformer",
     "make_column_transformer",
diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 14349662cfee9..1b5373dbbc316 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -6,29 +6,28 @@
 # Author: Andreas Mueller
 #         Joris Van den Bossche
 # License: BSD
-from numbers import Integral, Real
-from itertools import chain
 from collections import Counter
+from itertools import chain
+from numbers import Integral, Real
 
 import numpy as np
 from scipy import sparse
 
-from ..base import clone, TransformerMixin
-from ..base import _fit_context
-from ..utils._estimator_html_repr import _VisualBlock
-from ..pipeline import _fit_transform_one, _transform_one, _name_estimators
+from ..base import TransformerMixin, _fit_context, clone
+from ..pipeline import _fit_transform_one, _name_estimators, _transform_one
 from ..preprocessing import FunctionTransformer
-from ..utils import Bunch
-from ..utils import _safe_indexing
-from ..utils import _get_column_indices
-from ..utils._param_validation import HasMethods, Interval, StrOptions, Hidden
+from ..utils import Bunch, _get_column_indices, _safe_indexing, check_pandas_support
+from ..utils._estimator_html_repr import _VisualBlock
+from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions
 from ..utils._set_output import _get_output_config, _safe_set_output
-from ..utils import check_pandas_support
 from ..utils.metaestimators import _BaseComposition
-from ..utils.validation import check_array, check_is_fitted, _check_feature_names_in
-from ..utils.validation import _num_samples
-from ..utils.parallel import delayed, Parallel
-
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import (
+    _check_feature_names_in,
+    _num_samples,
+    check_array,
+    check_is_fitted,
+)
 
 __all__ = ["ColumnTransformer", "make_column_transformer", "make_column_selector"]
 
@@ -118,10 +117,12 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
         printed as it is completed.
 
     verbose_feature_names_out : bool, default=True
-        If True, :meth:`get_feature_names_out` will prefix all feature names
-        with the name of the transformer that generated that feature.
-        If False, :meth:`get_feature_names_out` will not prefix any feature
-        names and will error if feature names are not unique.
+        If True, :meth:`ColumnTransformer.get_feature_names_out` will prefix
+        all feature names with the name of the transformer that generated that
+        feature.
+        If False, :meth:`ColumnTransformer.get_feature_names_out` will not
+        prefix any feature names and will error if feature names are not
+        unique.
 
         .. versionadded:: 1.0
 
@@ -163,6 +164,12 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
 
         .. versionadded:: 0.24
 
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
     See Also
     --------
     make_column_transformer : Convenience function for
@@ -213,6 +220,9 @@ class ColumnTransformer(TransformerMixin, _BaseComposition):
     ...     [("text_preprocess", FeatureHasher(input_type="string"), "documents"),
     ...      ("num_preprocess", MinMaxScaler(), ["width"])])
     >>> X_trans = ct.fit_transform(X)  # doctest: +SKIP
+
+    For a more detailed example of usage, see
+    :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
     """
 
     _required_parameters = ["transformers"]
@@ -1024,10 +1034,12 @@ def make_column_transformer(
         printed as it is completed.
 
     verbose_feature_names_out : bool, default=True
-        If True, :meth:`get_feature_names_out` will prefix all feature names
-        with the name of the transformer that generated that feature.
-        If False, :meth:`get_feature_names_out` will not prefix any feature
-        names and will error if feature names are not unique.
+        If True, :meth:`ColumnTransformer.get_feature_names_out` will prefix
+        all feature names with the name of the transformer that generated that
+        feature.
+        If False, :meth:`ColumnTransformer.get_feature_names_out` will not
+        prefix any feature names and will error if feature names are not
+        unique.
 
         .. versionadded:: 1.0
 
@@ -1075,6 +1087,11 @@ class make_column_selector:
     columns name with a regex. When using multiple selection criteria, **all**
     criteria must match for a column to be selected.
 
+    For an example of how to use :func:`make_column_selector` within a
+    :class:`ColumnTransformer` to select columns based on data type (i.e.
+    `dtype`), refer to
+    :ref:`sphx_glr_auto_examples_compose_plot_column_transformer_mixed_types.py`.
+
     Parameters
     ----------
     pattern : str, default=None
diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py
index e926ed7abe324..79f8d503a24f9 100644
--- a/sklearn/compose/_target.py
+++ b/sklearn/compose/_target.py
@@ -6,14 +6,13 @@
 
 import numpy as np
 
-from ..base import BaseEstimator, RegressorMixin, clone
-from ..base import _fit_context
-from ..utils.validation import check_is_fitted
-from ..utils._tags import _safe_tags
-from ..utils import check_array, _safe_indexing
-from ..utils._param_validation import HasMethods
-from ..preprocessing import FunctionTransformer
+from ..base import BaseEstimator, RegressorMixin, _fit_context, clone
 from ..exceptions import NotFittedError
+from ..preprocessing import FunctionTransformer
+from ..utils import _safe_indexing, check_array
+from ..utils._param_validation import HasMethods
+from ..utils._tags import _safe_tags
+from ..utils.validation import check_is_fitted
 
 __all__ = ["TransformedTargetRegressor"]
 
@@ -109,9 +108,6 @@ class TransformedTargetRegressor(RegressorMixin, BaseEstimator):
     to be used by scikit-learn transformers. At the time of prediction, the
     output will be reshaped to a have the same number of dimensions as `y`.
 
-    See :ref:`examples/compose/plot_transformed_target.py
-    <sphx_glr_auto_examples_compose_plot_transformed_target.py>`.
-
     Examples
     --------
     >>> import numpy as np
@@ -127,6 +123,9 @@ class TransformedTargetRegressor(RegressorMixin, BaseEstimator):
     1.0
     >>> tt.regressor_.coef_
     array([2.])
+
+    For a more detailed example use case refer to
+    :ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py`.
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py
index cb9ddc0b4f344..36be45465c536 100644
--- a/sklearn/compose/tests/test_column_transformer.py
+++ b/sklearn/compose/tests/test_column_transformer.py
@@ -1,28 +1,33 @@
 """
 Test the ColumnTransformer.
 """
-import re
 import pickle
+import re
 
 import numpy as np
-from scipy import sparse
 import pytest
-
 from numpy.testing import assert_allclose
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose_dense_sparse
-from sklearn.utils._testing import assert_almost_equal
+from scipy import sparse
 
 from sklearn.base import BaseEstimator, TransformerMixin
 from sklearn.compose import (
     ColumnTransformer,
-    make_column_transformer,
     make_column_selector,
+    make_column_transformer,
 )
 from sklearn.exceptions import NotFittedError
-from sklearn.preprocessing import FunctionTransformer
-from sklearn.preprocessing import StandardScaler, Normalizer, OneHotEncoder
 from sklearn.feature_selection import VarianceThreshold
+from sklearn.preprocessing import (
+    FunctionTransformer,
+    Normalizer,
+    OneHotEncoder,
+    StandardScaler,
+)
+from sklearn.utils._testing import (
+    assert_allclose_dense_sparse,
+    assert_almost_equal,
+    assert_array_equal,
+)
 
 
 class Trans(TransformerMixin, BaseEstimator):
@@ -252,18 +257,32 @@ def test_column_transformer_dataframe():
     # ensure pandas object is passed through
 
     class TransAssert(BaseEstimator):
+        def __init__(self, expected_type_transform):
+            self.expected_type_transform = expected_type_transform
+
         def fit(self, X, y=None):
             return self
 
         def transform(self, X, y=None):
-            assert isinstance(X, (pd.DataFrame, pd.Series))
+            assert isinstance(X, self.expected_type_transform)
             if isinstance(X, pd.Series):
                 X = X.to_frame()
             return X
 
-    ct = ColumnTransformer([("trans", TransAssert(), "first")], remainder="drop")
+    ct = ColumnTransformer(
+        [("trans", TransAssert(expected_type_transform=pd.Series), "first")],
+        remainder="drop",
+    )
     ct.fit_transform(X_df)
-    ct = ColumnTransformer([("trans", TransAssert(), ["first", "second"])])
+    ct = ColumnTransformer(
+        [
+            (
+                "trans",
+                TransAssert(expected_type_transform=pd.DataFrame),
+                ["first", "second"],
+            )
+        ]
+    )
     ct.fit_transform(X_df)
 
     # integer column spec + integer column names -> still use positional
diff --git a/sklearn/compose/tests/test_target.py b/sklearn/compose/tests/test_target.py
index f0d63c00c2772..53242b7e0277b 100644
--- a/sklearn/compose/tests/test_target.py
+++ b/sklearn/compose/tests/test_target.py
@@ -1,25 +1,14 @@
 import numpy as np
 import pytest
 
-from sklearn.base import clone
-from sklearn.base import BaseEstimator
-from sklearn.base import TransformerMixin
-
-from sklearn.dummy import DummyRegressor
-
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_no_warnings
-
-from sklearn.preprocessing import FunctionTransformer
-from sklearn.preprocessing import StandardScaler
-
-from sklearn.pipeline import Pipeline
-
-from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit
-
 from sklearn import datasets
-
+from sklearn.base import BaseEstimator, TransformerMixin, clone
 from sklearn.compose import TransformedTargetRegressor
+from sklearn.dummy import DummyRegressor
+from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import FunctionTransformer, StandardScaler
+from sklearn.utils._testing import assert_allclose, assert_no_warnings
 
 friedman = datasets.make_friedman1(random_state=0)
 
diff --git a/sklearn/conftest.py b/sklearn/conftest.py
index 5d5f80d2e22d5..9eed56011a505 100644
--- a/sklearn/conftest.py
+++ b/sklearn/conftest.py
@@ -1,29 +1,29 @@
-from os import environ
-from functools import wraps
 import platform
 import sys
 from contextlib import suppress
+from functools import wraps
+from os import environ
 from unittest import SkipTest
 
 import joblib
-import pytest
 import numpy as np
-from threadpoolctl import threadpool_limits
+import pytest
 from _pytest.doctest import DoctestItem
+from threadpoolctl import threadpool_limits
 
-from sklearn.utils import _IS_32BIT
 from sklearn._min_dependencies import PYTEST_MIN_VERSION
-from sklearn.utils.fixes import sp_version
-from sklearn.utils.fixes import parse_version
-from sklearn.datasets import fetch_20newsgroups
-from sklearn.datasets import fetch_20newsgroups_vectorized
-from sklearn.datasets import fetch_california_housing
-from sklearn.datasets import fetch_covtype
-from sklearn.datasets import fetch_kddcup99
-from sklearn.datasets import fetch_olivetti_faces
-from sklearn.datasets import fetch_rcv1
+from sklearn.datasets import (
+    fetch_20newsgroups,
+    fetch_20newsgroups_vectorized,
+    fetch_california_housing,
+    fetch_covtype,
+    fetch_kddcup99,
+    fetch_olivetti_faces,
+    fetch_rcv1,
+)
 from sklearn.tests import random_seed
-
+from sklearn.utils import _IS_32BIT
+from sklearn.utils.fixes import np_base_version, parse_version, sp_version
 
 if parse_version(pytest.__version__) < parse_version(PYTEST_MIN_VERSION):
     raise ImportError(
@@ -178,6 +178,10 @@ def pytest_collection_modifyitems(config, items):
         )
         skip_doctests = True
 
+    if np_base_version >= parse_version("2"):
+        reason = "Due to NEP 51 numpy scalar repr has changed in numpy 2"
+        skip_doctests = True
+
     # Normally doctest has the entire module's scope. Here we set globs to an empty dict
     # to remove the module's scope:
     # https://docs.python.org/3/library/doctest.html#what-s-the-execution-context
diff --git a/sklearn/covariance/__init__.py b/sklearn/covariance/__init__.py
index 011fde3647145..8fcf8c68444e5 100644
--- a/sklearn/covariance/__init__.py
+++ b/sklearn/covariance/__init__.py
@@ -6,24 +6,23 @@
 Models.
 """
 
+from ._elliptic_envelope import EllipticEnvelope
 from ._empirical_covariance import (
-    empirical_covariance,
     EmpiricalCovariance,
+    empirical_covariance,
     log_likelihood,
 )
+from ._graph_lasso import GraphicalLasso, GraphicalLassoCV, graphical_lasso
+from ._robust_covariance import MinCovDet, fast_mcd
 from ._shrunk_covariance import (
-    shrunk_covariance,
+    OAS,
+    LedoitWolf,
     ShrunkCovariance,
     ledoit_wolf,
     ledoit_wolf_shrinkage,
-    LedoitWolf,
     oas,
-    OAS,
+    shrunk_covariance,
 )
-from ._robust_covariance import fast_mcd, MinCovDet
-from ._graph_lasso import graphical_lasso, GraphicalLasso, GraphicalLassoCV
-from ._elliptic_envelope import EllipticEnvelope
-
 
 __all__ = [
     "EllipticEnvelope",
diff --git a/sklearn/covariance/_elliptic_envelope.py b/sklearn/covariance/_elliptic_envelope.py
index c99f200592580..fe109dddd5303 100644
--- a/sklearn/covariance/_elliptic_envelope.py
+++ b/sklearn/covariance/_elliptic_envelope.py
@@ -2,14 +2,15 @@
 #
 # License: BSD 3 clause
 
-import numpy as np
 from numbers import Real
-from . import MinCovDet
+
+import numpy as np
+
+from ..base import OutlierMixin, _fit_context
+from ..metrics import accuracy_score
 from ..utils._param_validation import Interval
 from ..utils.validation import check_is_fitted
-from ..metrics import accuracy_score
-from ..base import OutlierMixin
-from ..base import _fit_context
+from ._robust_covariance import MinCovDet
 
 
 class EllipticEnvelope(OutlierMixin, MinCovDet):
diff --git a/sklearn/covariance/_empirical_covariance.py b/sklearn/covariance/_empirical_covariance.py
index 8083bfd2e1aa1..7484c58be896d 100644
--- a/sklearn/covariance/_empirical_covariance.py
+++ b/sklearn/covariance/_empirical_covariance.py
@@ -11,16 +11,16 @@
 
 # avoid division truncation
 import warnings
+
 import numpy as np
 from scipy import linalg
 
 from .. import config_context
-from ..base import BaseEstimator
-from ..base import _fit_context
+from ..base import BaseEstimator, _fit_context
+from ..metrics.pairwise import pairwise_distances
 from ..utils import check_array
 from ..utils._param_validation import validate_params
 from ..utils.extmath import fast_logdet
-from ..metrics.pairwise import pairwise_distances
 
 
 def log_likelihood(emp_cov, precision):
@@ -54,7 +54,8 @@ def log_likelihood(emp_cov, precision):
     {
         "X": ["array-like"],
         "assume_centered": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def empirical_covariance(X, *, assume_centered=False):
     """Compute the Maximum likelihood covariance estimator.
diff --git a/sklearn/covariance/_graph_lasso.py b/sklearn/covariance/_graph_lasso.py
index 8575cc4f75801..3b715a5d53bcc 100644
--- a/sklearn/covariance/_graph_lasso.py
+++ b/sklearn/covariance/_graph_lasso.py
@@ -5,32 +5,30 @@
 # Author: Gael Varoquaux <gael.varoquaux@normalesup.org>
 # License: BSD 3 clause
 # Copyright: INRIA
-import warnings
 import operator
 import sys
 import time
-
+import warnings
 from numbers import Integral, Real
+
 import numpy as np
 from scipy import linalg
 
-from . import empirical_covariance, EmpiricalCovariance, log_likelihood
-
 from ..base import _fit_context
 from ..exceptions import ConvergenceWarning
-from ..utils.validation import (
-    _is_arraylike_not_scalar,
-    check_random_state,
-    check_scalar,
-)
-from ..utils.parallel import delayed, Parallel
-from ..utils._param_validation import Interval, StrOptions
-from ..utils._param_validation import validate_params
 
 # mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast'
 from ..linear_model import _cd_fast as cd_fast  # type: ignore
 from ..linear_model import lars_path_gram
 from ..model_selection import check_cv, cross_val_score
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import (
+    _is_arraylike_not_scalar,
+    check_random_state,
+    check_scalar,
+)
+from . import EmpiricalCovariance, empirical_covariance, log_likelihood
 
 
 # Helper functions to compute the objective and dual objective functions
@@ -219,7 +217,8 @@ def alpha_max(emp_cov):
         "cov_init": ["array-like", None],
         "return_costs": ["boolean"],
         "return_n_iter": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=False,
 )
 def graphical_lasso(
     emp_cov,
@@ -738,7 +737,7 @@ class GraphicalLassoCV(BaseGraphicalLasso):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
 
-        For integer/None inputs :class:`KFold` is used.
+        For integer/None inputs :class:`~sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
diff --git a/sklearn/covariance/_robust_covariance.py b/sklearn/covariance/_robust_covariance.py
index c723bba7a097b..a6b32e50a6c1f 100644
--- a/sklearn/covariance/_robust_covariance.py
+++ b/sklearn/covariance/_robust_covariance.py
@@ -10,15 +10,16 @@
 
 import warnings
 from numbers import Integral, Real
+
 import numpy as np
 from scipy import linalg
 from scipy.stats import chi2
 
-from . import empirical_covariance, EmpiricalCovariance
 from ..base import _fit_context
-from ..utils.extmath import fast_logdet
-from ..utils import check_random_state, check_array
+from ..utils import check_array, check_random_state
 from ..utils._param_validation import Interval
+from ..utils.extmath import fast_logdet
+from ._empirical_covariance import EmpiricalCovariance, empirical_covariance
 
 
 # Minimum Covariance Determinant
diff --git a/sklearn/covariance/_shrunk_covariance.py b/sklearn/covariance/_shrunk_covariance.py
index 21d2e034b45d7..5a568192dd3c3 100644
--- a/sklearn/covariance/_shrunk_covariance.py
+++ b/sklearn/covariance/_shrunk_covariance.py
@@ -14,13 +14,14 @@
 
 # avoid division truncation
 import warnings
-from numbers import Real, Integral
+from numbers import Integral, Real
+
 import numpy as np
 
-from . import empirical_covariance, EmpiricalCovariance
 from ..base import _fit_context
 from ..utils import check_array
 from ..utils._param_validation import Interval, validate_params
+from . import EmpiricalCovariance, empirical_covariance
 
 
 def _ledoit_wolf(X, *, assume_centered, block_size):
@@ -104,7 +105,8 @@ def _oas(X, *, assume_centered=False):
     {
         "emp_cov": ["array-like"],
         "shrinkage": [Interval(Real, 0, 1, closed="both")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def shrunk_covariance(emp_cov, shrinkage=0.1):
     """Calculate a covariance matrix shrunk on the diagonal.
@@ -278,7 +280,8 @@ def fit(self, X, y=None):
         "X": ["array-like"],
         "assume_centered": ["boolean"],
         "block_size": [Interval(Integral, 1, None, closed="left")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
     """Estimate the shrunk Ledoit-Wolf covariance matrix.
@@ -375,7 +378,10 @@ def ledoit_wolf_shrinkage(X, assume_centered=False, block_size=1000):
     return shrinkage
 
 
-@validate_params({"X": ["array-like"]})
+@validate_params(
+    {"X": ["array-like"]},
+    prefer_skip_nested_validation=False,
+)
 def ledoit_wolf(X, *, assume_centered=False, block_size=1000):
     """Estimate the shrunk Ledoit-Wolf covariance matrix.
 
@@ -568,7 +574,10 @@ def fit(self, X, y=None):
 
 
 # OAS estimator
-@validate_params({"X": ["array-like"]})
+@validate_params(
+    {"X": ["array-like"]},
+    prefer_skip_nested_validation=False,
+)
 def oas(X, *, assume_centered=False):
     """Estimate covariance with the Oracle Approximating Shrinkage as proposed in [1]_.
 
diff --git a/sklearn/covariance/tests/test_covariance.py b/sklearn/covariance/tests/test_covariance.py
index bbd3a4757a835..0866c209a10c3 100644
--- a/sklearn/covariance/tests/test_covariance.py
+++ b/sklearn/covariance/tests/test_covariance.py
@@ -7,24 +7,25 @@
 import numpy as np
 import pytest
 
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-
 from sklearn import datasets
 from sklearn.covariance import (
-    empirical_covariance,
+    OAS,
     EmpiricalCovariance,
-    ShrunkCovariance,
-    shrunk_covariance,
     LedoitWolf,
+    ShrunkCovariance,
+    empirical_covariance,
     ledoit_wolf,
     ledoit_wolf_shrinkage,
-    OAS,
     oas,
+    shrunk_covariance,
 )
 from sklearn.covariance._shrunk_covariance import _ledoit_wolf
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 from .._shrunk_covariance import _oas
 
diff --git a/sklearn/covariance/tests/test_elliptic_envelope.py b/sklearn/covariance/tests/test_elliptic_envelope.py
index 122d4c8bfb4cc..ca85717fb3782 100644
--- a/sklearn/covariance/tests/test_elliptic_envelope.py
+++ b/sklearn/covariance/tests/test_elliptic_envelope.py
@@ -6,10 +6,12 @@
 import pytest
 
 from sklearn.covariance import EllipticEnvelope
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
 from sklearn.exceptions import NotFittedError
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 
 def test_elliptic_envelope(global_random_seed):
diff --git a/sklearn/covariance/tests/test_graphical_lasso.py b/sklearn/covariance/tests/test_graphical_lasso.py
index 44a60f3e05103..aaee1919e8dcc 100644
--- a/sklearn/covariance/tests/test_graphical_lasso.py
+++ b/sklearn/covariance/tests/test_graphical_lasso.py
@@ -1,29 +1,35 @@
 """ Test the graphical_lasso module.
 """
 import sys
-import pytest
+from io import StringIO
 
 import numpy as np
-from scipy import linalg
-
+import pytest
 from numpy.testing import assert_allclose
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_less
-from sklearn.utils._testing import _convert_container
+from scipy import linalg
 
+from sklearn import datasets
 from sklearn.covariance import (
-    graphical_lasso,
     GraphicalLasso,
     GraphicalLassoCV,
     empirical_covariance,
+    graphical_lasso,
 )
 from sklearn.datasets import make_sparse_spd_matrix
-from io import StringIO
 from sklearn.utils import check_random_state
-from sklearn import datasets
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_array_almost_equal,
+    assert_array_less,
+)
+
 
+def test_graphical_lassos(random_state=1):
+    """Test the graphical lasso solvers.
 
-def test_graphical_lasso(random_state=0):
+    This checks is unstable for some random seeds where the covariance found with "cd"
+    and "lars" solvers are different (4 cases / 100 tries).
+    """
     # Sample data from a sparse multivariate normal
     dim = 20
     n_samples = 100
@@ -45,10 +51,11 @@ def test_graphical_lasso(random_state=0):
             costs, dual_gap = np.array(costs).T
             # Check that the costs always decrease (doesn't hold if alpha == 0)
             if not alpha == 0:
-                assert_array_less(np.diff(costs), 0)
+                # use 1e-12 since the cost can be exactly 0
+                assert_array_less(np.diff(costs), 1e-12)
         # Check that the 2 approaches give similar results
-        assert_array_almost_equal(covs["cd"], covs["lars"], decimal=4)
-        assert_array_almost_equal(icovs["cd"], icovs["lars"], decimal=4)
+        assert_allclose(covs["cd"], covs["lars"], atol=1e-4)
+        assert_allclose(icovs["cd"], icovs["lars"], atol=1e-4)
 
     # Smoke test the estimator
     model = GraphicalLasso(alpha=0.25).fit(X)
diff --git a/sklearn/covariance/tests/test_robust_covariance.py b/sklearn/covariance/tests/test_robust_covariance.py
index 213f3d7e8f04b..44dcdbbbf8249 100644
--- a/sklearn/covariance/tests/test_robust_covariance.py
+++ b/sklearn/covariance/tests/test_robust_covariance.py
@@ -9,11 +9,9 @@
 import numpy as np
 import pytest
 
-from sklearn.utils._testing import assert_array_almost_equal
-
 from sklearn import datasets
-from sklearn.covariance import empirical_covariance, MinCovDet
-from sklearn.covariance import fast_mcd
+from sklearn.covariance import MinCovDet, empirical_covariance, fast_mcd
+from sklearn.utils._testing import assert_array_almost_equal
 
 X = datasets.load_iris().data
 X_1d = X[:, 0]
diff --git a/sklearn/cross_decomposition/__init__.py b/sklearn/cross_decomposition/__init__.py
index ec2f5fb3049af..47b78783caf9c 100644
--- a/sklearn/cross_decomposition/__init__.py
+++ b/sklearn/cross_decomposition/__init__.py
@@ -1,3 +1,3 @@
-from ._pls import PLSCanonical, PLSRegression, PLSSVD, CCA
+from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression
 
 __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"]
diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py
index da395d8f060fb..822a13064bb08 100644
--- a/sklearn/cross_decomposition/_pls.py
+++ b/sklearn/cross_decomposition/_pls.py
@@ -5,25 +5,27 @@
 # Author: Edouard Duchesnay <edouard.duchesnay@cea.fr>
 # License: BSD 3 clause
 
-from numbers import Integral, Real
-
 import warnings
 from abc import ABCMeta, abstractmethod
+from numbers import Integral, Real
 
 import numpy as np
 from scipy.linalg import svd
 
-from ..base import BaseEstimator, RegressorMixin, TransformerMixin
-from ..base import MultiOutputMixin
-from ..base import ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    MultiOutputMixin,
+    RegressorMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..exceptions import ConvergenceWarning
 from ..utils import check_array, check_consistent_length
-from ..utils.fixes import sp_version
-from ..utils.fixes import parse_version
-from ..utils.extmath import svd_flip
-from ..utils.validation import check_is_fitted, FLOAT_DTYPES
 from ..utils._param_validation import Interval, StrOptions
-from ..exceptions import ConvergenceWarning
+from ..utils.extmath import svd_flip
+from ..utils.fixes import parse_version, sp_version
+from ..utils.validation import FLOAT_DTYPES, check_is_fitted
 
 __all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"]
 
@@ -236,7 +238,10 @@ def fit(self, X, Y):
             Y, input_name="Y", dtype=np.float64, copy=self.copy, ensure_2d=False
         )
         if Y.ndim == 1:
+            self._predict_1d = True
             Y = Y.reshape(-1, 1)
+        else:
+            self._predict_1d = False
 
         n = X.shape[0]
         p = X.shape[1]
@@ -467,8 +472,8 @@ def predict(self, X, copy=True):
         # Normalize
         X -= self._x_mean
         X /= self._x_std
-        Ypred = X @ self.coef_.T
-        return Ypred + self.intercept_
+        Ypred = X @ self.coef_.T + self.intercept_
+        return Ypred.ravel() if self._predict_1d else Ypred
 
     def fit_transform(self, X, y=None):
         """Learn and apply the dimension reduction on the train data.
diff --git a/sklearn/cross_decomposition/tests/test_pls.py b/sklearn/cross_decomposition/tests/test_pls.py
index 8f4840c9b9f21..b8b5cbaa0f275 100644
--- a/sklearn/cross_decomposition/tests/test_pls.py
+++ b/sklearn/cross_decomposition/tests/test_pls.py
@@ -1,21 +1,22 @@
-import pytest
 import warnings
+
 import numpy as np
-from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose
+import pytest
+from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal
 
-from sklearn.datasets import load_linnerud
+from sklearn.cross_decomposition import CCA, PLSSVD, PLSCanonical, PLSRegression
 from sklearn.cross_decomposition._pls import (
     _center_scale_xy,
     _get_first_singular_vectors_power_method,
     _get_first_singular_vectors_svd,
     _svd_flip_1d,
 )
-from sklearn.cross_decomposition import CCA
-from sklearn.cross_decomposition import PLSSVD, PLSRegression, PLSCanonical
-from sklearn.datasets import make_regression
+from sklearn.datasets import load_linnerud, make_regression
+from sklearn.ensemble import VotingRegressor
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import LinearRegression
 from sklearn.utils import check_random_state
 from sklearn.utils.extmath import svd_flip
-from sklearn.exceptions import ConvergenceWarning
 
 
 def assert_matrix_orthogonal(M):
@@ -622,3 +623,24 @@ def test_pls_set_output(Klass):
     assert isinstance(y_trans, np.ndarray)
     assert isinstance(X_trans, pd.DataFrame)
     assert_array_equal(X_trans.columns, est.get_feature_names_out())
+
+
+def test_pls_regression_fit_1d_y():
+    """Check that when fitting with 1d `y`, prediction should also be 1d.
+
+    Non-regression test for Issue #26549.
+    """
+    X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
+    y = np.array([2, 6, 12, 20, 30, 42])
+    expected = y.copy()
+
+    plsr = PLSRegression().fit(X, y)
+    y_pred = plsr.predict(X)
+    assert y_pred.shape == expected.shape
+
+    # Check that it works in VotingRegressor
+    lr = LinearRegression().fit(X, y)
+    vr = VotingRegressor([("lr", lr), ("plsr", plsr)])
+    y_pred = vr.fit(X, y).predict(X)
+    assert y_pred.shape == expected.shape
+    assert_allclose(y_pred, expected)
diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py
index 465d4159a32c4..7ae7902f3365c 100644
--- a/sklearn/datasets/__init__.py
+++ b/sklearn/datasets/__init__.py
@@ -5,52 +5,55 @@
 """
 import textwrap
 
-from ._base import load_breast_cancer
-from ._base import load_diabetes
-from ._base import load_digits
-from ._base import load_files
-from ._base import load_iris
-from ._base import load_linnerud
-from ._base import load_sample_images
-from ._base import load_sample_image
-from ._base import load_wine
-from ._base import get_data_home
-from ._base import clear_data_home
+from ._base import (
+    clear_data_home,
+    get_data_home,
+    load_breast_cancer,
+    load_diabetes,
+    load_digits,
+    load_files,
+    load_iris,
+    load_linnerud,
+    load_sample_image,
+    load_sample_images,
+    load_wine,
+)
+from ._california_housing import fetch_california_housing
 from ._covtype import fetch_covtype
 from ._kddcup99 import fetch_kddcup99
-from ._lfw import fetch_lfw_pairs
-from ._lfw import fetch_lfw_people
-from ._twenty_newsgroups import fetch_20newsgroups
-from ._twenty_newsgroups import fetch_20newsgroups_vectorized
-from ._openml import fetch_openml
-from ._samples_generator import make_classification
-from ._samples_generator import make_multilabel_classification
-from ._samples_generator import make_hastie_10_2
-from ._samples_generator import make_regression
-from ._samples_generator import make_blobs
-from ._samples_generator import make_moons
-from ._samples_generator import make_circles
-from ._samples_generator import make_friedman1
-from ._samples_generator import make_friedman2
-from ._samples_generator import make_friedman3
-from ._samples_generator import make_low_rank_matrix
-from ._samples_generator import make_sparse_coded_signal
-from ._samples_generator import make_sparse_uncorrelated
-from ._samples_generator import make_spd_matrix
-from ._samples_generator import make_swiss_roll
-from ._samples_generator import make_s_curve
-from ._samples_generator import make_sparse_spd_matrix
-from ._samples_generator import make_gaussian_quantiles
-from ._samples_generator import make_biclusters
-from ._samples_generator import make_checkerboard
-from ._svmlight_format_io import load_svmlight_file
-from ._svmlight_format_io import load_svmlight_files
-from ._svmlight_format_io import dump_svmlight_file
+from ._lfw import fetch_lfw_pairs, fetch_lfw_people
 from ._olivetti_faces import fetch_olivetti_faces
-from ._species_distributions import fetch_species_distributions
-from ._california_housing import fetch_california_housing
+from ._openml import fetch_openml
 from ._rcv1 import fetch_rcv1
-
+from ._samples_generator import (
+    make_biclusters,
+    make_blobs,
+    make_checkerboard,
+    make_circles,
+    make_classification,
+    make_friedman1,
+    make_friedman2,
+    make_friedman3,
+    make_gaussian_quantiles,
+    make_hastie_10_2,
+    make_low_rank_matrix,
+    make_moons,
+    make_multilabel_classification,
+    make_regression,
+    make_s_curve,
+    make_sparse_coded_signal,
+    make_sparse_spd_matrix,
+    make_sparse_uncorrelated,
+    make_spd_matrix,
+    make_swiss_roll,
+)
+from ._species_distributions import fetch_species_distributions
+from ._svmlight_format_io import (
+    dump_svmlight_file,
+    load_svmlight_file,
+    load_svmlight_files,
+)
+from ._twenty_newsgroups import fetch_20newsgroups, fetch_20newsgroups_vectorized
 
 __all__ = [
     "clear_data_home",
diff --git a/sklearn/datasets/_arff_parser.py b/sklearn/datasets/_arff_parser.py
index bba06fbb74021..d9cc42de71f66 100644
--- a/sklearn/datasets/_arff_parser.py
+++ b/sklearn/datasets/_arff_parser.py
@@ -8,7 +8,6 @@
 import numpy as np
 import scipy as sp
 
-
 from ..externals import _arff
 from ..externals._arff import ArffSparseDataType
 from ..utils import (
diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index aba3a843400e7..5675798137824 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -7,26 +7,23 @@
 #               2010 Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
 import csv
-import hashlib
 import gzip
+import hashlib
+import os
 import shutil
 from collections import namedtuple
-import os
+from numbers import Integral
 from os import environ, listdir, makedirs
 from os.path import expanduser, isdir, join, splitext
 from pathlib import Path
-from numbers import Integral
-
-from ..preprocessing import scale
-from ..utils import Bunch
-from ..utils import check_random_state
-from ..utils import check_pandas_support
-from ..utils.fixes import _open_binary, _open_text, _read_text, _contents
-from ..utils._param_validation import validate_params, Interval, StrOptions
+from urllib.request import urlretrieve
 
 import numpy as np
 
-from urllib.request import urlretrieve
+from ..preprocessing import scale
+from ..utils import Bunch, check_pandas_support, check_random_state
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.fixes import _contents, _open_binary, _open_text, _read_text
 
 DATA_MODULE = "sklearn.datasets.data"
 DESCR_MODULE = "sklearn.datasets.descr"
@@ -38,7 +35,8 @@
 @validate_params(
     {
         "data_home": [str, os.PathLike, None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def get_data_home(data_home=None) -> str:
     """Return the path of the scikit-learn data directory.
@@ -57,13 +55,13 @@ def get_data_home(data_home=None) -> str:
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         The path to scikit-learn data directory. If `None`, the default path
-        is `~/sklearn_learn_data`.
+        is `~/scikit_learn_data`.
 
     Returns
     -------
-    data_home: str or path-like, default=None
+    data_home: str
         The path to scikit-learn data directory.
     """
     if data_home is None:
@@ -76,7 +74,8 @@ def get_data_home(data_home=None) -> str:
 @validate_params(
     {
         "data_home": [str, os.PathLike, None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def clear_data_home(data_home=None):
     """Delete all the content of the data home cache.
@@ -85,7 +84,7 @@ def clear_data_home(data_home=None):
     ----------
     data_home : str or path-like, default=None
         The path to scikit-learn data directory. If `None`, the default path
-        is `~/sklearn_learn_data`.
+        is `~/scikit_learn_data`.
     """
     data_home = get_data_home(data_home)
     shutil.rmtree(data_home)
@@ -120,7 +119,8 @@ def _convert_data_dataframe(
         "decode_error": [StrOptions({"strict", "ignore", "replace"})],
         "random_state": ["random_state"],
         "allowed_extensions": [list, None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def load_files(
     container_path,
@@ -454,7 +454,8 @@ def load_descr(descr_file_name, *, descr_module=DESCR_MODULE):
     {
         "return_X_y": ["boolean"],
         "as_frame": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def load_wine(*, return_X_y=False, as_frame=False):
     """Load and return the wine dataset (classification).
@@ -576,7 +577,10 @@ def load_wine(*, return_X_y=False, as_frame=False):
     )
 
 
-@validate_params({"return_X_y": ["boolean"], "as_frame": ["boolean"]})
+@validate_params(
+    {"return_X_y": ["boolean"], "as_frame": ["boolean"]},
+    prefer_skip_nested_validation=True,
+)
 def load_iris(*, return_X_y=False, as_frame=False):
     """Load and return the iris dataset (classification).
 
@@ -663,6 +667,9 @@ def load_iris(*, return_X_y=False, as_frame=False):
     array([0, 0, 1])
     >>> list(data.target_names)
     ['setosa', 'versicolor', 'virginica']
+
+    See :ref:`sphx_glr_auto_examples_datasets_plot_iris_dataset.py` for a more
+    detailed example of how to work with the iris dataset.
     """
     data_file_name = "iris.csv"
     data, target, target_names, fdescr = load_csv_data(
@@ -700,7 +707,10 @@ def load_iris(*, return_X_y=False, as_frame=False):
     )
 
 
-@validate_params({"return_X_y": ["boolean"], "as_frame": ["boolean"]})
+@validate_params(
+    {"return_X_y": ["boolean"], "as_frame": ["boolean"]},
+    prefer_skip_nested_validation=True,
+)
 def load_breast_cancer(*, return_X_y=False, as_frame=False):
     """Load and return the breast cancer wisconsin dataset (classification).
 
@@ -717,7 +727,7 @@ def load_breast_cancer(*, return_X_y=False, as_frame=False):
 
     The copy of UCI ML Breast Cancer Wisconsin (Diagnostic) dataset is
     downloaded from:
-    https://goo.gl/U2Uwz2
+    https://archive.ics.uci.edu/dataset/17/breast+cancer+wisconsin+diagnostic
 
     Read more in the :ref:`User Guide <breast_cancer_dataset>`.
 
@@ -749,9 +759,9 @@ def load_breast_cancer(*, return_X_y=False, as_frame=False):
         target : {ndarray, Series} of shape (569,)
             The classification target. If `as_frame=True`, `target` will be
             a pandas Series.
-        feature_names : list
+        feature_names : ndarray of shape (30,)
             The names of the dataset columns.
-        target_names : list
+        target_names : ndarray of shape (2,)
             The names of target classes.
         frame : DataFrame of shape (569, 31)
             Only present when `as_frame=True`. DataFrame with `data` and
@@ -855,7 +865,8 @@ def load_breast_cancer(*, return_X_y=False, as_frame=False):
         "n_class": [Interval(Integral, 1, 10, closed="both")],
         "return_X_y": ["boolean"],
         "as_frame": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def load_digits(*, n_class=10, return_X_y=False, as_frame=False):
     """Load and return the digits dataset (classification).
@@ -991,7 +1002,8 @@ def load_digits(*, n_class=10, return_X_y=False, as_frame=False):
 
 
 @validate_params(
-    {"return_X_y": ["boolean"], "as_frame": ["boolean"], "scaled": ["boolean"]}
+    {"return_X_y": ["boolean"], "as_frame": ["boolean"], "scaled": ["boolean"]},
+    prefer_skip_nested_validation=True,
 )
 def load_diabetes(*, return_X_y=False, as_frame=False, scaled=True):
     """Load and return the diabetes dataset (regression).
@@ -1108,7 +1120,8 @@ def load_diabetes(*, return_X_y=False, as_frame=False, scaled=True):
     {
         "return_X_y": ["boolean"],
         "as_frame": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def load_linnerud(*, return_X_y=False, as_frame=False):
     """Load and return the physical exercise Linnerud dataset.
@@ -1278,7 +1291,8 @@ def load_sample_images():
 @validate_params(
     {
         "image_name": [StrOptions({"china.jpg", "flower.jpg"})],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def load_sample_image(image_name):
     """Load the numpy array of a single sample image.
diff --git a/sklearn/datasets/_california_housing.py b/sklearn/datasets/_california_housing.py
index 96443c95f9979..3153f0dd03f72 100644
--- a/sklearn/datasets/_california_housing.py
+++ b/sklearn/datasets/_california_housing.py
@@ -21,24 +21,24 @@
 # Authors: Peter Prettenhofer
 # License: BSD 3 clause
 
-from os.path import exists
-from os import makedirs, remove
-import tarfile
-
-import numpy as np
 import logging
+import tarfile
+from os import PathLike, makedirs, remove
+from os.path import exists
 
 import joblib
+import numpy as np
 
-from . import get_data_home
-from ._base import _convert_data_dataframe
-from ._base import _fetch_remote
-from ._base import _pkl_filepath
-from ._base import RemoteFileMetadata
-from ._base import load_descr
 from ..utils import Bunch
 from ..utils._param_validation import validate_params
-
+from . import get_data_home
+from ._base import (
+    RemoteFileMetadata,
+    _convert_data_dataframe,
+    _fetch_remote,
+    _pkl_filepath,
+    load_descr,
+)
 
 # The original data can be found at:
 # https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.tgz
@@ -53,11 +53,12 @@
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "download_if_missing": ["boolean"],
         "return_X_y": ["boolean"],
         "as_frame": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_california_housing(
     *, data_home=None, download_if_missing=True, return_X_y=False, as_frame=False
@@ -75,7 +76,7 @@ def fetch_california_housing(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
diff --git a/sklearn/datasets/_covtype.py b/sklearn/datasets/_covtype.py
index 83bd8ad229924..7620e08c5ec92 100644
--- a/sklearn/datasets/_covtype.py
+++ b/sklearn/datasets/_covtype.py
@@ -14,24 +14,25 @@
 #         Peter Prettenhofer <peter.prettenhofer@gmail.com>
 # License: BSD 3 clause
 
-from gzip import GzipFile
 import logging
-from os.path import exists, join
 import os
+from gzip import GzipFile
+from os.path import exists, join
 from tempfile import TemporaryDirectory
 
-import numpy as np
 import joblib
+import numpy as np
 
-from . import get_data_home
-from ._base import _convert_data_dataframe
-from ._base import _fetch_remote
-from ._base import RemoteFileMetadata
-from ._base import load_descr
-from ..utils import Bunch
-from ._base import _pkl_filepath
-from ..utils import check_random_state
+from ..utils import Bunch, check_random_state
 from ..utils._param_validation import validate_params
+from . import get_data_home
+from ._base import (
+    RemoteFileMetadata,
+    _convert_data_dataframe,
+    _fetch_remote,
+    _pkl_filepath,
+    load_descr,
+)
 
 # The original data can be found in:
 # https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/covtype.data.gz
@@ -64,13 +65,14 @@
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, os.PathLike, None],
         "download_if_missing": ["boolean"],
         "random_state": ["random_state"],
         "shuffle": ["boolean"],
         "return_X_y": ["boolean"],
         "as_frame": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_covtype(
     *,
@@ -96,7 +98,7 @@ def fetch_covtype(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py
index 749e15cd53522..444bd01737901 100644
--- a/sklearn/datasets/_kddcup99.py
+++ b/sklearn/datasets/_kddcup99.py
@@ -9,24 +9,24 @@
 """
 
 import errno
-from gzip import GzipFile
 import logging
 import os
+from gzip import GzipFile
 from os.path import exists, join
 
-import numpy as np
 import joblib
+import numpy as np
 
-from ._base import _fetch_remote
-from ._base import _convert_data_dataframe
-from . import get_data_home
-from ._base import RemoteFileMetadata
-from ._base import load_descr
-from ..utils._param_validation import StrOptions, validate_params
-from ..utils import Bunch
-from ..utils import check_random_state
+from ..utils import Bunch, check_random_state
 from ..utils import shuffle as shuffle_method
-
+from ..utils._param_validation import StrOptions, validate_params
+from . import get_data_home
+from ._base import (
+    RemoteFileMetadata,
+    _convert_data_dataframe,
+    _fetch_remote,
+    load_descr,
+)
 
 # The original data can be found at:
 # https://archive.ics.uci.edu/ml/machine-learning-databases/kddcup99-mld/kddcup.data.gz
@@ -50,14 +50,15 @@
 @validate_params(
     {
         "subset": [StrOptions({"SA", "SF", "http", "smtp"}), None],
-        "data_home": [str, None],
+        "data_home": [str, os.PathLike, None],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
         "percent10": ["boolean"],
         "download_if_missing": ["boolean"],
         "return_X_y": ["boolean"],
         "as_frame": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_kddcup99(
     *,
@@ -91,7 +92,7 @@ def fetch_kddcup99(
         To return the corresponding classical subsets of kddcup 99.
         If None, return the entire kddcup 99 dataset.
 
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
diff --git a/sklearn/datasets/_lfw.py b/sklearn/datasets/_lfw.py
index 7f6cf8f235d3f..d06d29f21d0a5 100644
--- a/sklearn/datasets/_lfw.py
+++ b/sklearn/datasets/_lfw.py
@@ -8,22 +8,22 @@
 # Copyright (c) 2011 Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
 
-from os import listdir, makedirs, remove
-from os.path import join, exists, isdir
-from ..utils._param_validation import validate_params, Interval, Hidden, StrOptions
-from numbers import Integral, Real
 import logging
+from numbers import Integral, Real
+from os import PathLike, listdir, makedirs, remove
+from os.path import exists, isdir, join
 
 import numpy as np
 from joblib import Memory
 
+from ..utils import Bunch
+from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
 from ._base import (
-    get_data_home,
-    _fetch_remote,
     RemoteFileMetadata,
+    _fetch_remote,
+    get_data_home,
     load_descr,
 )
-from ..utils import Bunch
 
 logger = logging.getLogger(__name__)
 
@@ -234,7 +234,7 @@ def _fetch_lfw_people(
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "funneled": ["boolean"],
         "resize": [Interval(Real, 0, None, closed="neither"), None],
         "min_faces_per_person": [Interval(Integral, 0, None, closed="left"), None],
@@ -242,7 +242,8 @@ def _fetch_lfw_people(
         "slice_": [tuple, Hidden(None)],
         "download_if_missing": ["boolean"],
         "return_X_y": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_lfw_people(
     *,
@@ -271,7 +272,7 @@ def fetch_lfw_people(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
@@ -430,13 +431,14 @@ def _fetch_lfw_pairs(
 @validate_params(
     {
         "subset": [StrOptions({"train", "test", "10_folds"})],
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "funneled": ["boolean"],
         "resize": [Interval(Real, 0, None, closed="neither"), None],
         "color": ["boolean"],
         "slice_": [tuple, Hidden(None)],
         "download_if_missing": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_lfw_pairs(
     *,
@@ -478,7 +480,7 @@ def fetch_lfw_pairs(
         official evaluation set that is meant to be used with a 10-folds
         cross validation.
 
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By
         default all scikit-learn data is stored in '~/scikit_learn_data'
         subfolders.
diff --git a/sklearn/datasets/_olivetti_faces.py b/sklearn/datasets/_olivetti_faces.py
index 55f4b856c6cf0..8e1b3c91e254b 100644
--- a/sklearn/datasets/_olivetti_faces.py
+++ b/sklearn/datasets/_olivetti_faces.py
@@ -13,20 +13,17 @@
 # Copyright (c) 2011 David Warde-Farley <wardefar at iro dot umontreal dot ca>
 # License: BSD 3 clause
 
+from os import PathLike, makedirs, remove
 from os.path import exists
-from os import makedirs, remove
 
+import joblib
 import numpy as np
 from scipy.io import loadmat
-import joblib
 
-from . import get_data_home
-from ._base import _fetch_remote
-from ._base import RemoteFileMetadata
-from ._base import _pkl_filepath
-from ._base import load_descr
-from ..utils import check_random_state, Bunch
+from ..utils import Bunch, check_random_state
 from ..utils._param_validation import validate_params
+from . import get_data_home
+from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr
 
 # The original data can be found at:
 # https://cs.nyu.edu/~roweis/data/olivettifaces.mat
@@ -39,12 +36,13 @@
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
         "download_if_missing": ["boolean"],
         "return_X_y": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_olivetti_faces(
     *,
@@ -69,7 +67,7 @@ def fetch_olivetti_faces(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py
index 21d8eb99858bb..c9d09dc3ce46a 100644
--- a/sklearn/datasets/_openml.py
+++ b/sklearn/datasets/_openml.py
@@ -7,18 +7,28 @@
 from contextlib import closing
 from functools import wraps
 from os.path import join
-from typing import Callable, Optional, Dict, Tuple, List, Any, Union
 from tempfile import TemporaryDirectory
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 from urllib.error import HTTPError, URLError
-from urllib.request import urlopen, Request
+from urllib.request import Request, urlopen
 from warnings import warn
 
 import numpy as np
 
+from ..utils import (
+    Bunch,
+    check_pandas_support,  # noqa  # noqa
+)
+from ..utils._param_validation import (
+    Hidden,
+    Integral,
+    Interval,
+    Real,
+    StrOptions,
+    validate_params,
+)
 from . import get_data_home
 from ._arff_parser import load_arff_from_gzip_file
-from ..utils import Bunch
-from ..utils import check_pandas_support  # noqa
 
 __all__ = ["fetch_openml"]
 
@@ -734,19 +744,39 @@ def _valid_data_column_names(features_list, target_columns):
     return valid_data_column_names
 
 
+@validate_params(
+    {
+        "name": [str, None],
+        "version": [Interval(Integral, 1, None, closed="left"), StrOptions({"active"})],
+        "data_id": [Interval(Integral, 1, None, closed="left"), None],
+        "data_home": [str, os.PathLike, None],
+        "target_column": [str, list, None],
+        "cache": [bool],
+        "return_X_y": [bool],
+        "as_frame": [bool, StrOptions({"auto"})],
+        "n_retries": [Interval(Integral, 1, None, closed="left")],
+        "delay": [Interval(Real, 0, None, closed="right")],
+        "parser": [
+            StrOptions({"auto", "pandas", "liac-arff"}),
+            Hidden(StrOptions({"warn"})),
+        ],
+        "read_csv_kwargs": [dict, None],
+    },
+    prefer_skip_nested_validation=True,
+)
 def fetch_openml(
     name: Optional[str] = None,
     *,
     version: Union[str, int] = "active",
     data_id: Optional[int] = None,
-    data_home: Optional[str] = None,
+    data_home: Optional[Union[str, os.PathLike]] = None,
     target_column: Optional[Union[str, List]] = "default-target",
     cache: bool = True,
     return_X_y: bool = False,
     as_frame: Union[str, bool] = "auto",
     n_retries: int = 3,
     delay: float = 1.0,
-    parser: Optional[str] = "warn",
+    parser: str = "warn",
     read_csv_kwargs: Optional[Dict] = None,
 ):
     """Fetch dataset from openml by name or dataset id.
@@ -785,7 +815,7 @@ def fetch_openml(
         dataset. If data_id is not given, name (and potential version) are
         used to obtain a dataset.
 
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the data sets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
@@ -839,7 +869,7 @@ def fetch_openml(
         - `"pandas"`: this is the most efficient parser. However, it requires
           pandas to be installed and can only open dense datasets.
         - `"liac-arff"`: this is a pure Python ARFF parser that is much less
-          memory- and CPU-efficient. It deals with sparse ARFF dataset.
+          memory- and CPU-efficient. It deals with sparse ARFF datasets.
 
         If `"auto"` (future default), the parser is chosen automatically such that
         `"liac-arff"` is selected for sparse ARFF datasets, otherwise
@@ -854,7 +884,7 @@ def fetch_openml(
 
     read_csv_kwargs : dict, default=None
         Keyword arguments passed to :func:`pandas.read_csv` when loading the data
-        from a ARFF file and using the pandas parser. It can allows to
+        from a ARFF file and using the pandas parser. It can allow to
         overwrite some default parameters.
 
         .. versionadded:: 1.3
@@ -986,14 +1016,6 @@ def fetch_openml(
             "unusable. Warning: {}".format(data_description["warning"])
         )
 
-    # TODO(1.4): remove "warn" from the valid parser
-    valid_parsers = ("auto", "pandas", "liac-arff", "warn")
-    if parser not in valid_parsers:
-        raise ValueError(
-            f"`parser` must be one of {', '.join(repr(p) for p in valid_parsers)}. Got"
-            f" {parser!r} instead."
-        )
-
     if parser == "warn":
         # TODO(1.4): remove this warning
         parser = "liac-arff"
@@ -1009,11 +1031,6 @@ def fetch_openml(
             FutureWarning,
         )
 
-    if as_frame not in ("auto", True, False):
-        raise ValueError(
-            f"`as_frame` must be one of 'auto', True, or False. Got {as_frame} instead."
-        )
-
     return_sparse = data_description["format"].lower() == "sparse_arff"
     as_frame = not return_sparse if as_frame == "auto" else as_frame
     if parser == "auto":
@@ -1091,14 +1108,9 @@ def fetch_openml(
         target_columns = [target_column]
     elif target_column is None:
         target_columns = []
-    elif isinstance(target_column, list):
-        target_columns = target_column
     else:
-        raise TypeError(
-            "Did not recognize type of target_column"
-            "Should be str, list or None. Got: "
-            "{}".format(type(target_column))
-        )
+        # target_column already is of type list
+        target_columns = target_column
     data_columns = _valid_data_column_names(features_list, target_columns)
 
     shape: Optional[Tuple[int, int]]
diff --git a/sklearn/datasets/_rcv1.py b/sklearn/datasets/_rcv1.py
index ae391edbad113..d9f392d872216 100644
--- a/sklearn/datasets/_rcv1.py
+++ b/sklearn/datasets/_rcv1.py
@@ -9,25 +9,20 @@
 # License: BSD 3 clause
 
 import logging
-
-from os import remove, makedirs
-from os.path import exists, join
 from gzip import GzipFile
+from os import PathLike, makedirs, remove
+from os.path import exists, join
 
+import joblib
 import numpy as np
 import scipy.sparse as sp
-import joblib
 
+from ..utils import Bunch
+from ..utils import shuffle as shuffle_
+from ..utils._param_validation import StrOptions, validate_params
 from . import get_data_home
-from ._base import _pkl_filepath
-from ._base import _fetch_remote
-from ._base import RemoteFileMetadata
-from ._base import load_descr
+from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath, load_descr
 from ._svmlight_format_io import load_svmlight_files
-from ..utils import shuffle as shuffle_
-from ..utils import Bunch
-from ..utils._param_validation import validate_params, StrOptions
-
 
 # The original vectorized data can be found at:
 #    http://www.ai.mit.edu/projects/jmlr/papers/volume5/lewis04a/a13-vector-files/lyrl2004_vectors_test_pt0.dat.gz
@@ -79,13 +74,14 @@
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, PathLike, None],
         "subset": [StrOptions({"train", "test", "all"})],
         "download_if_missing": ["boolean"],
         "random_state": ["random_state"],
         "shuffle": ["boolean"],
         "return_X_y": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_rcv1(
     *,
@@ -115,7 +111,7 @@ def fetch_rcv1(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
diff --git a/sklearn/datasets/_samples_generator.py b/sklearn/datasets/_samples_generator.py
index cb3b36d944eb2..d6fee2d61e8a6 100644
--- a/sklearn/datasets/_samples_generator.py
+++ b/sklearn/datasets/_samples_generator.py
@@ -6,20 +6,20 @@
 #          G. Louppe, J. Nothman
 # License: BSD 3 clause
 
-from numbers import Integral, Real
-import numbers
 import array
+import numbers
 import warnings
 from collections.abc import Iterable
+from numbers import Integral, Real
 
 import numpy as np
-from scipy import linalg
 import scipy.sparse as sp
+from scipy import linalg
 
 from ..preprocessing import MultiLabelBinarizer
 from ..utils import check_array, check_random_state
-from ..utils._param_validation import Interval, validate_params, Hidden, StrOptions
 from ..utils import shuffle as util_shuffle
+from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
 from ..utils.random import sample_without_replacement
 
 
@@ -56,7 +56,8 @@ def _generate_hypercube(samples, dimensions, rng):
         "scale": [Interval(Real, 0, None, closed="neither"), "array-like", None],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_classification(
     n_samples=100,
@@ -321,7 +322,8 @@ def make_classification(
         "return_indicator": [StrOptions({"dense", "sparse"}), "boolean"],
         "return_distributions": ["boolean"],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_multilabel_classification(
     n_samples=100,
@@ -480,7 +482,8 @@ def sample_example():
     {
         "n_samples": [Interval(Integral, 1, None, closed="left")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_hastie_10_2(n_samples=12000, *, random_state=None):
     """Generate data for binary classification used in Hastie et al. 2009, Example 10.2.
@@ -542,7 +545,8 @@ def make_hastie_10_2(n_samples=12000, *, random_state=None):
         "shuffle": ["boolean"],
         "coef": ["boolean"],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_regression(
     n_samples=100,
@@ -700,7 +704,8 @@ def make_regression(
         "noise": [Interval(Real, 0, None, closed="left"), None],
         "random_state": ["random_state"],
         "factor": [Interval(Real, 0, 1, closed="left")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_circles(
     n_samples=100, *, shuffle=True, noise=None, random_state=None, factor=0.8
@@ -784,7 +789,8 @@ def make_circles(
         "shuffle": ["boolean"],
         "noise": [Interval(Real, 0, None, closed="left"), None],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_moons(n_samples=100, *, shuffle=True, noise=None, random_state=None):
     """Make two interleaving half circles.
@@ -865,7 +871,8 @@ def make_moons(n_samples=100, *, shuffle=True, noise=None, random_state=None):
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
         "return_centers": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_blobs(
     n_samples=100,
@@ -1040,7 +1047,8 @@ def make_blobs(
         "n_features": [Interval(Integral, 5, None, closed="left")],
         "noise": [Interval(Real, 0.0, None, closed="left")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_friedman1(n_samples=100, n_features=10, *, noise=0.0, random_state=None):
     """Generate the "Friedman #1" regression problem.
@@ -1111,7 +1119,8 @@ def make_friedman1(n_samples=100, n_features=10, *, noise=0.0, random_state=None
         "n_samples": [Interval(Integral, 1, None, closed="left")],
         "noise": [Interval(Real, 0, None, closed="left")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_friedman2(n_samples=100, *, noise=0.0, random_state=None):
     """Generate the "Friedman #2" regression problem.
@@ -1183,7 +1192,8 @@ def make_friedman2(n_samples=100, *, noise=0.0, random_state=None):
         "n_samples": [Interval(Integral, 1, None, closed="left")],
         "noise": [Interval(Real, 0, None, closed="left")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_friedman3(n_samples=100, *, noise=0.0, random_state=None):
     """Generate the "Friedman #3" regression problem.
@@ -1257,7 +1267,8 @@ def make_friedman3(n_samples=100, *, noise=0.0, random_state=None):
         "effective_rank": [Interval(Integral, 1, None, closed="left")],
         "tail_strength": [Interval(Real, 0, 1, closed="both")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_low_rank_matrix(
     n_samples=100,
@@ -1349,7 +1360,8 @@ def make_low_rank_matrix(
         "n_nonzero_coefs": [Interval(Integral, 1, None, closed="left")],
         "random_state": ["random_state"],
         "data_transposed": ["boolean", Hidden(StrOptions({"deprecated"}))],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_sparse_coded_signal(
     n_samples,
@@ -1454,7 +1466,8 @@ def make_sparse_coded_signal(
         "n_samples": [Interval(Integral, 1, None, closed="left")],
         "n_features": [Interval(Integral, 1, None, closed="left")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_sparse_uncorrelated(n_samples=100, n_features=10, *, random_state=None):
     """Generate a random regression problem with sparse uncorrelated design.
@@ -1511,7 +1524,8 @@ def make_sparse_uncorrelated(n_samples=100, n_features=10, *, random_state=None)
     {
         "n_dim": [Interval(Integral, 1, None, closed="left")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_spd_matrix(n_dim, *, random_state=None):
     """Generate a random symmetric, positive-definite matrix.
@@ -1554,7 +1568,8 @@ def make_spd_matrix(n_dim, *, random_state=None):
         "smallest_coef": [Interval(Real, 0, 1, closed="both")],
         "largest_coef": [Interval(Real, 0, 1, closed="both")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_sparse_spd_matrix(
     dim=1,
@@ -1642,7 +1657,8 @@ def make_sparse_spd_matrix(
         "noise": [Interval(Real, 0, None, closed="left")],
         "random_state": ["random_state"],
         "hole": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_swiss_roll(n_samples=100, *, noise=0.0, random_state=None, hole=False):
     """Generate a swiss roll dataset.
@@ -1714,7 +1730,8 @@ def make_swiss_roll(n_samples=100, *, noise=0.0, random_state=None, hole=False):
         "n_samples": [Interval(Integral, 1, None, closed="left")],
         "noise": [Interval(Real, 0, None, closed="left")],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_s_curve(n_samples=100, *, noise=0.0, random_state=None):
     """Generate an S curve dataset.
@@ -1765,7 +1782,8 @@ def make_s_curve(n_samples=100, *, noise=0.0, random_state=None):
         "n_classes": [Interval(Integral, 1, None, closed="left")],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_gaussian_quantiles(
     *,
@@ -1880,7 +1898,8 @@ def _shuffle(data, random_state=None):
         "maxval": [Interval(Real, None, None, closed="neither")],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_biclusters(
     shape,
@@ -1988,7 +2007,8 @@ def make_biclusters(
         "maxval": [Interval(Real, None, None, closed="neither")],
         "shuffle": ["boolean"],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_checkerboard(
     shape,
diff --git a/sklearn/datasets/_species_distributions.py b/sklearn/datasets/_species_distributions.py
index 3387217349e20..a1e654d41e071 100644
--- a/sklearn/datasets/_species_distributions.py
+++ b/sklearn/datasets/_species_distributions.py
@@ -37,21 +37,18 @@
 #
 # License: BSD 3 clause
 
+import logging
 from io import BytesIO
-from os import makedirs, remove
+from os import PathLike, makedirs, remove
 from os.path import exists
 
-import logging
-import numpy as np
-
 import joblib
+import numpy as np
 
-from . import get_data_home
-from ._base import _fetch_remote
-from ._base import RemoteFileMetadata
 from ..utils import Bunch
-from ._base import _pkl_filepath
 from ..utils._param_validation import validate_params
+from . import get_data_home
+from ._base import RemoteFileMetadata, _fetch_remote, _pkl_filepath
 
 # The original data can be found at:
 # https://biodiversityinformatics.amnh.org/open_source/maxent/samples.zip
@@ -138,7 +135,10 @@ def construct_grids(batch):
     return (xgrid, ygrid)
 
 
-@validate_params({"data_home": [str, None], "download_if_missing": ["boolean"]})
+@validate_params(
+    {"data_home": [str, PathLike, None], "download_if_missing": ["boolean"]},
+    prefer_skip_nested_validation=True,
+)
 def fetch_species_distributions(*, data_home=None, download_if_missing=True):
     """Loader for species distribution dataset from Phillips et. al. (2006).
 
@@ -146,7 +146,7 @@ def fetch_species_distributions(*, data_home=None, download_if_missing=True):
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify another download and cache folder for the datasets. By default
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py
index e04d90e15dceb..a4fb553b5d7aa 100644
--- a/sklearn/datasets/_svmlight_format_io.py
+++ b/sklearn/datasets/_svmlight_format_io.py
@@ -15,22 +15,21 @@
 #          Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
 
-from contextlib import closing
 import os.path
+from contextlib import closing
+from numbers import Integral
 
 import numpy as np
 import scipy.sparse as sp
-from numbers import Integral
 
 from .. import __version__
-
-from ..utils import check_array, IS_PYPY
-from ..utils._param_validation import validate_params, HasMethods, Interval, StrOptions
+from ..utils import IS_PYPY, check_array
+from ..utils._param_validation import HasMethods, Interval, StrOptions, validate_params
 
 if not IS_PYPY:
     from ._svmlight_format_fast import (
-        _load_svmlight_file,
         _dump_svmlight_file,
+        _load_svmlight_file,
     )
 else:
 
@@ -58,7 +57,8 @@ def _load_svmlight_file(*args, **kwargs):
         "query_id": ["boolean"],
         "offset": [Interval(Integral, 0, None, closed="left")],
         "length": [Integral],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def load_svmlight_file(
     f,
@@ -260,7 +260,8 @@ def _open_and_load(f, dtype, multilabel, zero_based, query_id, offset=0, length=
         "query_id": ["boolean"],
         "offset": [Interval(Integral, 0, None, closed="left")],
         "length": [Integral],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def load_svmlight_files(
     files,
@@ -449,7 +450,8 @@ def _dump_svmlight(X, y, f, multilabel, one_based, comment, query_id):
         "comment": [str, bytes, None],
         "query_id": ["array-like", None],
         "multilabel": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def dump_svmlight_file(
     X,
diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py
index 512b7974a497d..5973e998c34b9 100644
--- a/sklearn/datasets/_twenty_newsgroups.py
+++ b/sklearn/datasets/_twenty_newsgroups.py
@@ -24,29 +24,30 @@
 # Copyright (c) 2011 Olivier Grisel <olivier.grisel@ensta.org>
 # License: BSD 3 clause
 
-import os
+import codecs
 import logging
-import tarfile
+import os
 import pickle
-import shutil
 import re
-import codecs
+import shutil
+import tarfile
 
+import joblib
 import numpy as np
 import scipy.sparse as sp
-import joblib
 
-from . import get_data_home
-from . import load_files
-from ._base import _convert_data_dataframe
-from ._base import _pkl_filepath
-from ._base import _fetch_remote
-from ._base import RemoteFileMetadata
-from ._base import load_descr
-from ..feature_extraction.text import CountVectorizer
 from .. import preprocessing
-from ..utils import check_random_state, Bunch
+from ..feature_extraction.text import CountVectorizer
+from ..utils import Bunch, check_random_state
 from ..utils._param_validation import StrOptions, validate_params
+from . import get_data_home, load_files
+from ._base import (
+    RemoteFileMetadata,
+    _convert_data_dataframe,
+    _fetch_remote,
+    _pkl_filepath,
+    load_descr,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -152,7 +153,7 @@ def strip_newsgroup_footer(text):
 
 @validate_params(
     {
-        "data_home": [str, None],
+        "data_home": [str, os.PathLike, None],
         "subset": [StrOptions({"train", "test", "all"})],
         "categories": ["array-like", None],
         "shuffle": ["boolean"],
@@ -160,7 +161,8 @@ def strip_newsgroup_footer(text):
         "remove": [tuple],
         "download_if_missing": ["boolean"],
         "return_X_y": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_20newsgroups(
     *,
@@ -189,7 +191,7 @@ def fetch_20newsgroups(
 
     Parameters
     ----------
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify a download and cache folder for the datasets. If None,
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
@@ -207,7 +209,7 @@ def fetch_20newsgroups(
         make the assumption that the samples are independent and identically
         distributed (i.i.d.), such as stochastic gradient descent.
 
-    random_state : int, RandomState instance or None, default=None
+    random_state : int, RandomState instance or None, default=42
         Determines random number generation for dataset shuffling. Pass an int
         for reproducible output across multiple function calls.
         See :term:`Glossary <random_state>`.
@@ -317,7 +319,7 @@ def fetch_20newsgroups(
         # Sort the categories to have the ordering of the labels
         labels.sort()
         labels, categories = zip(*labels)
-        mask = np.in1d(data.target, labels)
+        mask = np.isin(data.target, labels)
         data.filenames = data.filenames[mask]
         data.target = data.target[mask]
         # searchsorted to have continuous labels
@@ -349,12 +351,13 @@ def fetch_20newsgroups(
     {
         "subset": [StrOptions({"train", "test", "all"})],
         "remove": [tuple],
-        "data_home": [str, None],
+        "data_home": [str, os.PathLike, None],
         "download_if_missing": ["boolean"],
         "return_X_y": ["boolean"],
         "normalize": ["boolean"],
         "as_frame": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fetch_20newsgroups_vectorized(
     *,
@@ -408,7 +411,7 @@ def fetch_20newsgroups_vectorized(
         ends of posts that look like signatures, and 'quotes' removes lines
         that appear to be quoting another post.
 
-    data_home : str, default=None
+    data_home : str or path-like, default=None
         Specify an download and cache folder for the datasets. If None,
         all scikit-learn data is stored in '~/scikit_learn_data' subfolders.
 
diff --git a/sklearn/datasets/descr/breast_cancer.rst b/sklearn/datasets/descr/breast_cancer.rst
index bc4d60b9a363d..a532ef960737f 100644
--- a/sklearn/datasets/descr/breast_cancer.rst
+++ b/sklearn/datasets/descr/breast_cancer.rst
@@ -104,15 +104,19 @@ This database is also available through the UW CS ftp server:
 ftp ftp.cs.wisc.edu
 cd math-prog/cpo-dataset/machine-learn/WDBC/
 
-.. topic:: References
-
-   - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction 
-     for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on 
-     Electronic Imaging: Science and Technology, volume 1905, pages 861-870,
-     San Jose, CA, 1993.
-   - O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and 
-     prognosis via linear programming. Operations Research, 43(4), pages 570-577, 
-     July-August 1995.
-   - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques
-     to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) 
-     163-171.
\ No newline at end of file
+|details-start|
+**References**
+|details-split|
+
+- W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction 
+  for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on 
+  Electronic Imaging: Science and Technology, volume 1905, pages 861-870,
+  San Jose, CA, 1993.
+- O.L. Mangasarian, W.N. Street and W.H. Wolberg. Breast cancer diagnosis and 
+  prognosis via linear programming. Operations Research, 43(4), pages 570-577, 
+  July-August 1995.
+- W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques
+  to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) 
+  163-171.
+
+|details-end|
\ No newline at end of file
diff --git a/sklearn/datasets/descr/digits.rst b/sklearn/datasets/descr/digits.rst
index 244f34f316865..40d819e92b7ab 100644
--- a/sklearn/datasets/descr/digits.rst
+++ b/sklearn/datasets/descr/digits.rst
@@ -32,15 +32,19 @@ T. Candela, D. L. Dimmick, J. Geist, P. J. Grother, S. A. Janet, and C.
 L. Wilson, NIST Form-Based Handprint Recognition System, NISTIR 5469,
 1994.
 
-.. topic:: References
-
-  - C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their
-    Applications to Handwritten Digit Recognition, MSc Thesis, Institute of
-    Graduate Studies in Science and Engineering, Bogazici University.
-  - E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika.
-  - Ken Tang and Ponnuthurai N. Suganthan and Xi Yao and A. Kai Qin.
-    Linear dimensionalityreduction using relevance weighted LDA. School of
-    Electrical and Electronic Engineering Nanyang Technological University.
-    2005.
-  - Claudio Gentile. A New Approximate Maximal Margin Classification
-    Algorithm. NIPS. 2000.
+|details-start|
+**References**
+|details-split|
+
+- C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their
+  Applications to Handwritten Digit Recognition, MSc Thesis, Institute of
+  Graduate Studies in Science and Engineering, Bogazici University.
+- E. Alpaydin, C. Kaynak (1998) Cascading Classifiers, Kybernetika.
+- Ken Tang and Ponnuthurai N. Suganthan and Xi Yao and A. Kai Qin.
+  Linear dimensionalityreduction using relevance weighted LDA. School of
+  Electrical and Electronic Engineering Nanyang Technological University.
+  2005.
+- Claudio Gentile. A New Approximate Maximal Margin Classification
+  Algorithm. NIPS. 2000.
+
+|details-end|
\ No newline at end of file
diff --git a/sklearn/datasets/descr/iris.rst b/sklearn/datasets/descr/iris.rst
index e05206454d218..02236dcb1c19f 100644
--- a/sklearn/datasets/descr/iris.rst
+++ b/sklearn/datasets/descr/iris.rst
@@ -45,19 +45,23 @@ data set contains 3 classes of 50 instances each, where each class refers to a
 type of iris plant.  One class is linearly separable from the other 2; the
 latter are NOT linearly separable from each other.
 
-.. topic:: References
-
-   - Fisher, R.A. "The use of multiple measurements in taxonomic problems"
-     Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
-     Mathematical Statistics" (John Wiley, NY, 1950).
-   - Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.
-     (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
-   - Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
-     Structure and Classification Rule for Recognition in Partially Exposed
-     Environments".  IEEE Transactions on Pattern Analysis and Machine
-     Intelligence, Vol. PAMI-2, No. 1, 67-71.
-   - Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions
-     on Information Theory, May 1972, 431-433.
-   - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II
-     conceptual clustering system finds 3 classes in the data.
-   - Many, many more ...
\ No newline at end of file
+|details-start|
+**References**
+|details-split|
+
+- Fisher, R.A. "The use of multiple measurements in taxonomic problems"
+  Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
+  Mathematical Statistics" (John Wiley, NY, 1950).
+- Duda, R.O., & Hart, P.E. (1973) Pattern Classification and Scene Analysis.
+  (Q327.D83) John Wiley & Sons.  ISBN 0-471-22361-1.  See page 218.
+- Dasarathy, B.V. (1980) "Nosing Around the Neighborhood: A New System
+  Structure and Classification Rule for Recognition in Partially Exposed
+  Environments".  IEEE Transactions on Pattern Analysis and Machine
+  Intelligence, Vol. PAMI-2, No. 1, 67-71.
+- Gates, G.W. (1972) "The Reduced Nearest Neighbor Rule".  IEEE Transactions
+  on Information Theory, May 1972, 431-433.
+- See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II
+  conceptual clustering system finds 3 classes in the data.
+- Many, many more ...
+
+|details-end|
\ No newline at end of file
diff --git a/sklearn/datasets/descr/lfw.rst b/sklearn/datasets/descr/lfw.rst
index e7fc35c3caabc..8105d7d6d633a 100644
--- a/sklearn/datasets/descr/lfw.rst
+++ b/sklearn/datasets/descr/lfw.rst
@@ -32,8 +32,9 @@ face detector from various online websites.
     Features            real, between 0 and 255
     =================   =======================
 
-Usage
-~~~~~
+|details-start|
+**Usage**
+|details-split|
 
 ``scikit-learn`` provides two loaders that will automatically download,
 cache, parse the metadata files, decode the jpeg and convert the
@@ -111,6 +112,8 @@ The :func:`sklearn.datasets.fetch_lfw_pairs` datasets is subdivided into
 an evaluation ``10_folds`` set meant to compute performance metrics using a
 10-folds cross validation scheme.
 
+|details-end|
+
 .. topic:: References:
 
  * `Labeled Faces in the Wild: A Database for Studying Face Recognition
@@ -120,7 +123,6 @@ an evaluation ``10_folds`` set meant to compute performance metrics using a
    University of Massachusetts, Amherst, Technical Report 07-49, October, 2007.
 
 
-Examples
-~~~~~~~~
+.. topic:: Examples:
 
-:ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`
+   * :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`
diff --git a/sklearn/datasets/descr/linnerud.rst b/sklearn/datasets/descr/linnerud.rst
index f7c10a95423d0..81c970bb6e3e6 100644
--- a/sklearn/datasets/descr/linnerud.rst
+++ b/sklearn/datasets/descr/linnerud.rst
@@ -18,7 +18,11 @@ twenty middle-aged men in a fitness club:
 - *exercise* - CSV containing 20 observations on 3 exercise variables:
    Chins, Situps and Jumps.
 
-.. topic:: References
+|details-start|
+**References**
+|details-split|
 
-  * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris:
-    Editions Technic.
+* Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris:
+  Editions Technic.
+
+|details-end|
\ No newline at end of file
diff --git a/sklearn/datasets/descr/twenty_newsgroups.rst b/sklearn/datasets/descr/twenty_newsgroups.rst
index 8e373c6ec3e74..669e158244134 100644
--- a/sklearn/datasets/descr/twenty_newsgroups.rst
+++ b/sklearn/datasets/descr/twenty_newsgroups.rst
@@ -27,8 +27,9 @@ extractor.
     Features                  text
     =================   ==========
 
-Usage
-~~~~~
+|details-start|
+**Usage**
+|details-split|
 
 The :func:`sklearn.datasets.fetch_20newsgroups` function is a data
 fetching / caching functions that downloads the data archive from
@@ -89,8 +90,11 @@ list of the categories to load to the
   >>> newsgroups_train.target[:10]
   array([0, 1, 1, 1, 0, 1, 1, 0, 0, 0])
 
-Converting text to vectors
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+|details-end|
+
+|details-start|
+**Converting text to vectors**
+|details-split|
 
 In order to feed predictive or clustering models with the text data,
 one first need to turn the text into vectors of numerical values suitable
@@ -122,9 +126,11 @@ returns ready-to-use token counts features instead of file names.
 .. _`20 newsgroups website`: http://people.csail.mit.edu/jrennie/20Newsgroups/
 .. _`TF-IDF`: https://en.wikipedia.org/wiki/Tf-idf
 
+|details-end|
 
-Filtering text for more realistic training
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+|details-start|
+**Filtering text for more realistic training**
+|details-split|
 
 It is easy for a classifier to overfit on particular things that appear in the
 20 Newsgroups data, such as newsgroup headers. Many classifiers achieve very
@@ -218,6 +224,7 @@ It loses even more if we also strip this metadata from the training data:
 Some other classifiers cope better with this harder version of the task. Try the
 :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_text_feature_extraction.py`
 example with and without the `remove` option to compare the results.
+|details-end|
 
 .. topic:: Data Considerations
 
diff --git a/sklearn/datasets/descr/wine_data.rst b/sklearn/datasets/descr/wine_data.rst
index dbe7f38e44aa6..e20efea9ba719 100644
--- a/sklearn/datasets/descr/wine_data.rst
+++ b/sklearn/datasets/descr/wine_data.rst
@@ -74,22 +74,26 @@ Lichman, M. (2013). UCI Machine Learning Repository
 [https://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 School of Information and Computer Science. 
 
-.. topic:: References
-
-  (1) S. Aeberhard, D. Coomans and O. de Vel, 
-  Comparison of Classifiers in High Dimensional Settings, 
-  Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of  
-  Mathematics and Statistics, James Cook University of North Queensland. 
-  (Also submitted to Technometrics). 
-
-  The data was used with many others for comparing various 
-  classifiers. The classes are separable, though only RDA 
-  has achieved 100% correct classification. 
-  (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) 
-  (All results using the leave-one-out technique) 
-
-  (2) S. Aeberhard, D. Coomans and O. de Vel, 
-  "THE CLASSIFICATION PERFORMANCE OF RDA" 
-  Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of 
-  Mathematics and Statistics, James Cook University of North Queensland. 
-  (Also submitted to Journal of Chemometrics).
+|details-start|
+**References**
+|details-split|
+
+(1) S. Aeberhard, D. Coomans and O. de Vel, 
+Comparison of Classifiers in High Dimensional Settings, 
+Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of  
+Mathematics and Statistics, James Cook University of North Queensland. 
+(Also submitted to Technometrics). 
+
+The data was used with many others for comparing various 
+classifiers. The classes are separable, though only RDA 
+has achieved 100% correct classification. 
+(RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) 
+(All results using the leave-one-out technique) 
+
+(2) S. Aeberhard, D. Coomans and O. de Vel, 
+"THE CLASSIFICATION PERFORMANCE OF RDA" 
+Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of 
+Mathematics and Statistics, James Cook University of North Queensland. 
+(Also submitted to Journal of Chemometrics).
+
+|details-end|
\ No newline at end of file
diff --git a/sklearn/datasets/tests/conftest.py b/sklearn/datasets/tests/conftest.py
index ef1280f6218b1..c8ab1cd04ee6e 100644
--- a/sklearn/datasets/tests/conftest.py
+++ b/sklearn/datasets/tests/conftest.py
@@ -1,6 +1,7 @@
 """ Network tests are only run, if data is already locally available,
 or if download is specifically requested by environment variable."""
 import builtins
+
 import pytest
 
 
diff --git a/sklearn/datasets/tests/test_20news.py b/sklearn/datasets/tests/test_20news.py
index e30348c894559..4072d9c8ec67f 100644
--- a/sklearn/datasets/tests/test_20news.py
+++ b/sklearn/datasets/tests/test_20news.py
@@ -4,16 +4,17 @@
 from functools import partial
 from unittest.mock import patch
 
-import pytest
-
 import numpy as np
+import pytest
 import scipy.sparse as sp
 
-from sklearn.datasets.tests.test_common import check_as_frame
-from sklearn.datasets.tests.test_common import check_pandas_dependency_message
-from sklearn.datasets.tests.test_common import check_return_X_y
-from sklearn.utils._testing import assert_allclose_dense_sparse
+from sklearn.datasets.tests.test_common import (
+    check_as_frame,
+    check_pandas_dependency_message,
+    check_return_X_y,
+)
 from sklearn.preprocessing import normalize
+from sklearn.utils._testing import assert_allclose_dense_sparse
 
 
 def test_20news(fetch_20newsgroups_fxt):
@@ -63,7 +64,7 @@ def test_20news_length_consistency(fetch_20newsgroups_fxt):
 def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
     # test subset = train
     bunch = fetch_20newsgroups_vectorized_fxt(subset="train")
-    assert sp.isspmatrix_csr(bunch.data)
+    assert sp.issparse(bunch.data) and bunch.data.format == "csr"
     assert bunch.data.shape == (11314, 130107)
     assert bunch.target.shape[0] == 11314
     assert bunch.data.dtype == np.float64
@@ -71,7 +72,7 @@ def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
 
     # test subset = test
     bunch = fetch_20newsgroups_vectorized_fxt(subset="test")
-    assert sp.isspmatrix_csr(bunch.data)
+    assert sp.issparse(bunch.data) and bunch.data.format == "csr"
     assert bunch.data.shape == (7532, 130107)
     assert bunch.target.shape[0] == 7532
     assert bunch.data.dtype == np.float64
@@ -83,7 +84,7 @@ def test_20news_vectorized(fetch_20newsgroups_vectorized_fxt):
 
     # test subset = all
     bunch = fetch_20newsgroups_vectorized_fxt(subset="all")
-    assert sp.isspmatrix_csr(bunch.data)
+    assert sp.issparse(bunch.data) and bunch.data.format == "csr"
     assert bunch.data.shape == (11314 + 7532, 130107)
     assert bunch.target.shape[0] == 11314 + 7532
     assert bunch.data.dtype == np.float64
diff --git a/sklearn/datasets/tests/test_arff_parser.py b/sklearn/datasets/tests/test_arff_parser.py
index 8465289d187ee..b675439cd2e9d 100644
--- a/sklearn/datasets/tests/test_arff_parser.py
+++ b/sklearn/datasets/tests/test_arff_parser.py
@@ -1,5 +1,5 @@
-from io import BytesIO
 import textwrap
+from io import BytesIO
 
 import pytest
 
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index 23dc78570fc9d..f84c275d67cf9 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -2,31 +2,44 @@
 import shutil
 import tempfile
 import warnings
-from pickle import loads
-from pickle import dumps
 from functools import partial
+from pathlib import Path
+from pickle import dumps, loads
 
-import pytest
 import numpy as np
-from sklearn.datasets import get_data_home
-from sklearn.datasets import clear_data_home
-from sklearn.datasets import load_files
-from sklearn.datasets import load_sample_images
-from sklearn.datasets import load_sample_image
-from sklearn.datasets import load_digits
-from sklearn.datasets import load_diabetes
-from sklearn.datasets import load_linnerud
-from sklearn.datasets import load_iris
-from sklearn.datasets import load_breast_cancer
-from sklearn.datasets import load_wine
+import pytest
+
+from sklearn.datasets import (
+    clear_data_home,
+    get_data_home,
+    load_breast_cancer,
+    load_diabetes,
+    load_digits,
+    load_files,
+    load_iris,
+    load_linnerud,
+    load_sample_image,
+    load_sample_images,
+    load_wine,
+)
 from sklearn.datasets._base import (
     load_csv_data,
     load_gzip_compressed_csv_data,
 )
+from sklearn.datasets.tests.test_common import check_as_frame
 from sklearn.preprocessing import scale
 from sklearn.utils import Bunch
 from sklearn.utils.fixes import _is_resource
-from sklearn.datasets.tests.test_common import check_as_frame
+
+
+class _DummyPath:
+    """Minimal class that implements the os.PathLike interface."""
+
+    def __init__(self, path):
+        self.path = path
+
+    def __fspath__(self):
+        return self.path
 
 
 def _remove_dir(path):
@@ -65,13 +78,18 @@ def test_category_dir_2(load_files_root):
     _remove_dir(test_category_dir2)
 
 
-def test_data_home(data_home):
+@pytest.mark.parametrize("path_container", [None, Path, _DummyPath])
+def test_data_home(path_container, data_home):
     # get_data_home will point to a pre-existing folder
+    if path_container is not None:
+        data_home = path_container(data_home)
     data_home = get_data_home(data_home=data_home)
     assert data_home == data_home
     assert os.path.exists(data_home)
 
     # clear_data_home will delete both the content and the folder it-self
+    if path_container is not None:
+        data_home = path_container(data_home)
     clear_data_home(data_home=data_home)
     assert not os.path.exists(data_home)
 
diff --git a/sklearn/datasets/tests/test_california_housing.py b/sklearn/datasets/tests/test_california_housing.py
index 495becccd820f..ef6fc95db80bf 100644
--- a/sklearn/datasets/tests/test_california_housing.py
+++ b/sklearn/datasets/tests/test_california_housing.py
@@ -1,10 +1,11 @@
 """Test the california_housing loader, if the data is available,
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
+from functools import partial
+
 import pytest
 
 from sklearn.datasets.tests.test_common import check_return_X_y
-from functools import partial
 
 
 def test_fetch(fetch_california_housing_fxt):
diff --git a/sklearn/datasets/tests/test_common.py b/sklearn/datasets/tests/test_common.py
index 5f21bdc66b4dc..8048a31041ddc 100644
--- a/sklearn/datasets/tests/test_common.py
+++ b/sklearn/datasets/tests/test_common.py
@@ -2,8 +2,8 @@
 import inspect
 import os
 
-import pytest
 import numpy as np
+import pytest
 
 import sklearn.datasets
 
diff --git a/sklearn/datasets/tests/test_covtype.py b/sklearn/datasets/tests/test_covtype.py
index 2cc2fed81bad6..e44fdaae69ec3 100644
--- a/sklearn/datasets/tests/test_covtype.py
+++ b/sklearn/datasets/tests/test_covtype.py
@@ -2,7 +2,9 @@
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
 from functools import partial
+
 import pytest
+
 from sklearn.datasets.tests.test_common import check_return_X_y
 
 
diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py
index 8eb1d6ec71eb3..5f6e9c83a30b8 100644
--- a/sklearn/datasets/tests/test_kddcup99.py
+++ b/sklearn/datasets/tests/test_kddcup99.py
@@ -7,11 +7,14 @@
 """
 
 from functools import partial
+
 import pytest
 
-from sklearn.datasets.tests.test_common import check_as_frame
-from sklearn.datasets.tests.test_common import check_pandas_dependency_message
-from sklearn.datasets.tests.test_common import check_return_X_y
+from sklearn.datasets.tests.test_common import (
+    check_as_frame,
+    check_pandas_dependency_message,
+    check_return_X_y,
+)
 
 
 @pytest.mark.parametrize("as_frame", [True, False])
diff --git a/sklearn/datasets/tests/test_lfw.py b/sklearn/datasets/tests/test_lfw.py
index 36f33d8a10289..92edb99ce3b0b 100644
--- a/sklearn/datasets/tests/test_lfw.py
+++ b/sklearn/datasets/tests/test_lfw.py
@@ -8,19 +8,18 @@
 joblib, successive runs will be fast (less than 200ms).
 """
 
-import random
 import os
+import random
 import shutil
 import tempfile
+from functools import partial
+
 import numpy as np
 import pytest
-from functools import partial
-from sklearn.datasets import fetch_lfw_pairs
-from sklearn.datasets import fetch_lfw_people
 
-from sklearn.utils._testing import assert_array_equal
+from sklearn.datasets import fetch_lfw_pairs, fetch_lfw_people
 from sklearn.datasets.tests.test_common import check_return_X_y
-
+from sklearn.utils._testing import assert_array_equal
 
 SCIKIT_LEARN_DATA = None
 SCIKIT_LEARN_EMPTY_DATA = None
diff --git a/sklearn/datasets/tests/test_olivetti_faces.py b/sklearn/datasets/tests/test_olivetti_faces.py
index 18fceb0ed8b0e..e5d6c853aa454 100644
--- a/sklearn/datasets/tests/test_olivetti_faces.py
+++ b/sklearn/datasets/tests/test_olivetti_faces.py
@@ -4,9 +4,8 @@
 
 import numpy as np
 
-from sklearn.utils import Bunch
 from sklearn.datasets.tests.test_common import check_return_X_y
-
+from sklearn.utils import Bunch
 from sklearn.utils._testing import assert_array_equal
 
 
diff --git a/sklearn/datasets/tests/test_openml.py b/sklearn/datasets/tests/test_openml.py
index c13b82dd769d3..93dcffef0cd03 100644
--- a/sklearn/datasets/tests/test_openml.py
+++ b/sklearn/datasets/tests/test_openml.py
@@ -8,28 +8,26 @@
 from urllib.error import HTTPError
 
 import numpy as np
-import scipy.sparse
 import pytest
+import scipy.sparse
 
 import sklearn
 from sklearn import config_context
+from sklearn.datasets import fetch_openml as fetch_openml_orig
+from sklearn.datasets._openml import (
+    _OPENML_PREFIX,
+    _get_local_path,
+    _open_openml_url,
+    _retry_with_clean_cache,
+)
 from sklearn.utils import Bunch, check_pandas_support
-from sklearn.utils.fixes import _open_binary
 from sklearn.utils._testing import (
     SkipTest,
     assert_allclose,
     assert_array_equal,
     fails_if_pypy,
 )
-
-from sklearn.datasets import fetch_openml as fetch_openml_orig
-from sklearn.datasets._openml import (
-    _OPENML_PREFIX,
-    _open_openml_url,
-    _get_local_path,
-    _retry_with_clean_cache,
-)
-
+from sklearn.utils.fixes import _open_binary
 
 OPENML_TEST_DATA_MODULE = "sklearn.datasets.tests.data.openml"
 # if True, urlopen will be monkey patched to only use local files
@@ -981,8 +979,14 @@ def test_fetch_openml_types_inference(
 @pytest.mark.parametrize(
     "params, err_msg",
     [
-        ({"parser": "unknown"}, "`parser` must be one of"),
-        ({"as_frame": "unknown"}, "`as_frame` must be one of"),
+        (
+            {"parser": "unknown"},
+            "The 'parser' parameter of fetch_openml must be a str among",
+        ),
+        (
+            {"as_frame": "unknown"},
+            "The 'as_frame' parameter of fetch_openml must be an instance",
+        ),
     ],
 )
 def test_fetch_openml_validation_parameter(monkeypatch, params, err_msg):
@@ -1260,17 +1264,17 @@ def test_fetch_openml_error(
         (
             {"data_id": -1, "name": None, "version": "version"},
             ValueError,
-            "Dataset data_id=-1 and version=version passed, but you can only",
+            "The 'version' parameter of fetch_openml must be an int in the range",
         ),
         (
             {"data_id": -1, "name": "nAmE"},
             ValueError,
-            "Dataset data_id=-1 and name=name passed, but you can only",
+            "The 'data_id' parameter of fetch_openml must be an int in the range",
         ),
         (
             {"data_id": -1, "name": "nAmE", "version": "version"},
             ValueError,
-            "Dataset data_id=-1 and name=name passed, but you can only",
+            "The 'version' parameter of fetch_openml must be an int",
         ),
         (
             {},
@@ -1530,7 +1534,7 @@ def test_fetch_openml_verify_checksum(monkeypatch, as_frame, cache, tmpdir, pars
         modified_gzip.write(data)
 
     # Requests are already mocked by monkey_patch_webbased_functions.
-    # We want to re-use that mock for all requests except file download,
+    # We want to reuse that mock for all requests except file download,
     # hence creating a thin mock over the original mock
     mocked_openml_url = sklearn.datasets._openml.urlopen
 
diff --git a/sklearn/datasets/tests/test_rcv1.py b/sklearn/datasets/tests/test_rcv1.py
index 11d0335f4fb8c..fbb9d67015a30 100644
--- a/sklearn/datasets/tests/test_rcv1.py
+++ b/sklearn/datasets/tests/test_rcv1.py
@@ -2,12 +2,13 @@
 or if specifically requested via environment variable
 (e.g. for CI jobs)."""
 
-import scipy.sparse as sp
-import numpy as np
 from functools import partial
+
+import numpy as np
+import scipy.sparse as sp
+
 from sklearn.datasets.tests.test_common import check_return_X_y
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_almost_equal, assert_array_equal
 
 
 def test_fetch_rcv1(fetch_rcv1_fxt, global_random_seed):
diff --git a/sklearn/datasets/tests/test_samples_generator.py b/sklearn/datasets/tests/test_samples_generator.py
index cd23fc5016672..ad6569f0863bf 100644
--- a/sklearn/datasets/tests/test_samples_generator.py
+++ b/sklearn/datasets/tests/test_samples_generator.py
@@ -6,31 +6,33 @@
 import pytest
 import scipy.sparse as sp
 
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_multilabel_classification
-from sklearn.datasets import make_hastie_10_2
-from sklearn.datasets import make_regression
-from sklearn.datasets import make_blobs
-from sklearn.datasets import make_friedman1
-from sklearn.datasets import make_friedman2
-from sklearn.datasets import make_friedman3
-from sklearn.datasets import make_low_rank_matrix
-from sklearn.datasets import make_moons
-from sklearn.datasets import make_circles
-from sklearn.datasets import make_sparse_coded_signal
-from sklearn.datasets import make_sparse_uncorrelated
-from sklearn.datasets import make_spd_matrix
-from sklearn.datasets import make_swiss_roll
-from sklearn.datasets import make_s_curve
-from sklearn.datasets import make_biclusters
-from sklearn.datasets import make_checkerboard
-
+from sklearn.datasets import (
+    make_biclusters,
+    make_blobs,
+    make_checkerboard,
+    make_circles,
+    make_classification,
+    make_friedman1,
+    make_friedman2,
+    make_friedman3,
+    make_hastie_10_2,
+    make_low_rank_matrix,
+    make_moons,
+    make_multilabel_classification,
+    make_regression,
+    make_s_curve,
+    make_sparse_coded_signal,
+    make_sparse_uncorrelated,
+    make_spd_matrix,
+    make_swiss_roll,
+)
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 from sklearn.utils.validation import assert_all_finite
 
 
diff --git a/sklearn/datasets/tests/test_svmlight_format.py b/sklearn/datasets/tests/test_svmlight_format.py
index 0b76cce3c5a4d..213e9095a73da 100644
--- a/sklearn/datasets/tests/test_svmlight_format.py
+++ b/sklearn/datasets/tests/test_svmlight_format.py
@@ -1,22 +1,23 @@
-from bz2 import BZ2File
 import gzip
-from io import BytesIO
-import numpy as np
-import scipy.sparse as sp
 import os
 import shutil
+from bz2 import BZ2File
+from io import BytesIO
 from tempfile import NamedTemporaryFile
 
+import numpy as np
 import pytest
-
-from sklearn.utils.fixes import _open_binary, _path
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal, assert_allclose
-from sklearn.utils._testing import fails_if_pypy
+import scipy.sparse as sp
 
 import sklearn
-from sklearn.datasets import load_svmlight_file, load_svmlight_files, dump_svmlight_file
-
+from sklearn.datasets import dump_svmlight_file, load_svmlight_file, load_svmlight_files
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    fails_if_pypy,
+)
+from sklearn.utils.fixes import _open_binary, _path
 
 TEST_DATA_MODULE = "sklearn.datasets.tests.data"
 datafile = "svmlight_classification.txt"
diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py
index c5f323d3c5d72..1f9cfe07dc0e8 100644
--- a/sklearn/decomposition/__init__.py
+++ b/sklearn/decomposition/__init__.py
@@ -5,29 +5,28 @@
 """
 
 
-from ._nmf import (
-    NMF,
-    MiniBatchNMF,
-    non_negative_factorization,
-)
-from ._pca import PCA
-from ._incremental_pca import IncrementalPCA
-from ._kernel_pca import KernelPCA
-from ._sparse_pca import SparsePCA, MiniBatchSparsePCA
-from ._truncated_svd import TruncatedSVD
-from ._fastica import FastICA, fastica
+from ..utils.extmath import randomized_svd
 from ._dict_learning import (
-    dict_learning,
-    dict_learning_online,
-    sparse_encode,
     DictionaryLearning,
     MiniBatchDictionaryLearning,
     SparseCoder,
+    dict_learning,
+    dict_learning_online,
+    sparse_encode,
 )
 from ._factor_analysis import FactorAnalysis
-from ..utils.extmath import randomized_svd
+from ._fastica import FastICA, fastica
+from ._incremental_pca import IncrementalPCA
+from ._kernel_pca import KernelPCA
 from ._lda import LatentDirichletAllocation
-
+from ._nmf import (
+    NMF,
+    MiniBatchNMF,
+    non_negative_factorization,
+)
+from ._pca import PCA
+from ._sparse_pca import MiniBatchSparsePCA, SparsePCA
+from ._truncated_svd import TruncatedSVD
 
 __all__ = [
     "DictionaryLearning",
diff --git a/sklearn/decomposition/_base.py b/sklearn/decomposition/_base.py
index 20bf7af4f284a..9634395a335ba 100644
--- a/sklearn/decomposition/_base.py
+++ b/sklearn/decomposition/_base.py
@@ -8,12 +8,13 @@
 #
 # License: BSD 3 clause
 
+from abc import ABCMeta, abstractmethod
+
 import numpy as np
 from scipy import linalg
 
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
+from ..base import BaseEstimator, ClassNamePrefixFeaturesOutMixin, TransformerMixin
 from ..utils.validation import check_is_fitted
-from abc import ABCMeta, abstractmethod
 
 
 class _BasePCA(
diff --git a/sklearn/decomposition/_dict_learning.py b/sklearn/decomposition/_dict_learning.py
index 54b3590f5b62e..7fc0915f2ea8e 100644
--- a/sklearn/decomposition/_dict_learning.py
+++ b/sklearn/decomposition/_dict_learning.py
@@ -3,27 +3,29 @@
 # Author: Vlad Niculae, Gael Varoquaux, Alexandre Gramfort
 # License: BSD 3 clause
 
-import time
-import sys
 import itertools
-from numbers import Integral, Real
+import sys
+import time
 import warnings
-
 from math import ceil
+from numbers import Integral, Real
 
 import numpy as np
-from scipy import linalg
 from joblib import effective_n_jobs
+from scipy import linalg
 
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
-from ..utils import check_array, check_random_state, gen_even_slices, gen_batches
-from ..utils._param_validation import Hidden, Interval, StrOptions
-from ..utils._param_validation import validate_params
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..linear_model import Lars, Lasso, LassoLars, orthogonal_mp_gram
+from ..utils import check_array, check_random_state, gen_batches, gen_even_slices
+from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
 from ..utils.extmath import randomized_svd, row_norms, svd_flip
+from ..utils.parallel import Parallel, delayed
 from ..utils.validation import check_is_fitted
-from ..utils.parallel import delayed, Parallel
-from ..linear_model import Lasso, orthogonal_mp_gram, LassoLars, Lars
 
 
 def _check_positive_coding(method, positive):
@@ -218,7 +220,8 @@ def _sparse_encode_precomputed(
         "check_input": ["boolean"],
         "verbose": ["verbose"],
         "positive": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 # XXX : could be moved to the linear_model module
 def sparse_encode(
@@ -733,7 +736,8 @@ def dict_learning_online(
     dict_init : ndarray of shape (n_components, n_features), default=None
         Initial values for the dictionary for warm restart scenarios.
         If `None`, the initial values for the dictionary are created
-        with an SVD decomposition of the data via :func:`~sklearn.utils.randomized_svd`.
+        with an SVD decomposition of the data via
+        :func:`~sklearn.utils.extmath.randomized_svd`.
 
     callback : callable, default=None
         A callable that gets invoked at the end of each iteration.
@@ -1077,7 +1081,8 @@ def dict_learning_online(
         "method": [StrOptions({"lars", "cd"})],
         "return_n_iter": ["boolean"],
         "method_max_iter": [Interval(Integral, 0, None, closed="left")],
-    }
+    },
+    prefer_skip_nested_validation=False,
 )
 def dict_learning(
     X,
@@ -1687,7 +1692,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
     >>> from sklearn.datasets import make_sparse_coded_signal
     >>> from sklearn.decomposition import DictionaryLearning
     >>> X, dictionary, code = make_sparse_coded_signal(
-    ...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,
+    ...     n_samples=30, n_components=15, n_features=20, n_nonzero_coefs=10,
     ...     random_state=42,
     ... )
     >>> dict_learner = DictionaryLearning(
@@ -1699,7 +1704,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
     We can check the level of sparsity of `X_transformed`:
 
     >>> np.mean(X_transformed == 0)
-    0.41...
+    0.52...
 
     We can compare the average squared euclidean norm of the reconstruction
     error of the sparse coded signal relative to the squared euclidean norm of
@@ -1707,7 +1712,7 @@ class DictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
     >>> X_hat = X_transformed @ dict_learner.components_
     >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
-    0.07...
+    0.05...
     """
 
     _parameter_constraints: dict = {
@@ -2057,16 +2062,16 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
     >>> from sklearn.datasets import make_sparse_coded_signal
     >>> from sklearn.decomposition import MiniBatchDictionaryLearning
     >>> X, dictionary, code = make_sparse_coded_signal(
-    ...     n_samples=100, n_components=15, n_features=20, n_nonzero_coefs=10,
+    ...     n_samples=30, n_components=15, n_features=20, n_nonzero_coefs=10,
     ...     random_state=42)
     >>> dict_learner = MiniBatchDictionaryLearning(
     ...     n_components=15, batch_size=3, transform_algorithm='lasso_lars',
-    ...     transform_alpha=0.1, random_state=42)
+    ...     transform_alpha=0.1, max_iter=20, random_state=42)
     >>> X_transformed = dict_learner.fit_transform(X)
 
     We can check the level of sparsity of `X_transformed`:
 
-    >>> np.mean(X_transformed == 0) < 0.5
+    >>> np.mean(X_transformed == 0) > 0.5
     True
 
     We can compare the average squared euclidean norm of the reconstruction
@@ -2075,7 +2080,7 @@ class MiniBatchDictionaryLearning(_BaseSparseCoding, BaseEstimator):
 
     >>> X_hat = X_transformed @ dict_learner.components_
     >>> np.mean(np.sum((X_hat - X) ** 2, axis=1) / np.sum(X ** 2, axis=1))
-    0.057...
+    0.052...
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/decomposition/_factor_analysis.py b/sklearn/decomposition/_factor_analysis.py
index 8c3d590b2c814..af3498d534483 100644
--- a/sklearn/decomposition/_factor_analysis.py
+++ b/sklearn/decomposition/_factor_analysis.py
@@ -20,19 +20,23 @@
 # License: BSD3
 
 import warnings
-from math import sqrt, log
+from math import log, sqrt
 from numbers import Integral, Real
+
 import numpy as np
 from scipy import linalg
 
-
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import fast_logdet, randomized_svd, squared_norm
 from ..utils.validation import check_is_fitted
-from ..exceptions import ConvergenceWarning
 
 
 class FactorAnalysis(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py
index 6dcf62c0ace3b..4b5b6c3f86a63 100644
--- a/sklearn/decomposition/_fastica.py
+++ b/sklearn/decomposition/_fastica.py
@@ -15,12 +15,16 @@
 import numpy as np
 from scipy import linalg
 
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
 from ..exceptions import ConvergenceWarning
-from ..utils import check_array, as_float_array, check_random_state
+from ..utils import as_float_array, check_array, check_random_state
+from ..utils._param_validation import Interval, Options, StrOptions, validate_params
 from ..utils.validation import check_is_fitted
-from ..utils._param_validation import Interval, StrOptions, Options, validate_params
 
 __all__ = ["fastica", "FastICA"]
 
@@ -163,7 +167,8 @@ def _cube(x, fun_args):
         "return_X_mean": ["boolean"],
         "compute_sources": ["boolean"],
         "return_n_iter": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=False,
 )
 def fastica(
     X,
diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
index 5ae5d58b06ca4..f05e2dacc66b2 100644
--- a/sklearn/decomposition/_incremental_pca.py
+++ b/sklearn/decomposition/_incremental_pca.py
@@ -5,14 +5,15 @@
 # License: BSD 3 clause
 
 from numbers import Integral
+
 import numpy as np
 from scipy import linalg, sparse
 
-from ._base import _BasePCA
 from ..base import _fit_context
 from ..utils import gen_batches
 from ..utils._param_validation import Interval
-from ..utils.extmath import svd_flip, _incremental_mean_and_var
+from ..utils.extmath import _incremental_mean_and_var, svd_flip
+from ._base import _BasePCA
 
 
 class IncrementalPCA(_BasePCA):
diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py
index 61d502a006c5e..800b472a9b3a6 100644
--- a/sklearn/decomposition/_kernel_pca.py
+++ b/sklearn/decomposition/_kernel_pca.py
@@ -4,24 +4,29 @@
 #         Sylvain Marie <sylvain.marie@schneider-electric.com>
 # License: BSD 3 clause
 
-import numpy as np
 from numbers import Integral, Real
+
+import numpy as np
 from scipy import linalg
-from scipy.sparse.linalg import eigsh
 from scipy.linalg import eigh
+from scipy.sparse.linalg import eigsh
 
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..exceptions import NotFittedError
+from ..metrics.pairwise import pairwise_kernels
+from ..preprocessing import KernelCenterer
 from ..utils._arpack import _init_arpack_v0
-from ..utils.extmath import svd_flip, _randomized_eigsh
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import _randomized_eigsh, svd_flip
 from ..utils.validation import (
-    check_is_fitted,
     _check_psd_eigenvalues,
+    check_is_fitted,
 )
-from ..utils._param_validation import Interval, StrOptions
-from ..exceptions import NotFittedError
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
-from ..preprocessing import KernelCenterer
-from ..metrics.pairwise import pairwise_kernels
 
 
 class KernelPCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
@@ -427,7 +432,7 @@ def fit(self, X, y=None):
             raise ValueError("Cannot fit_inverse_transform with a precomputed kernel.")
         X = self._validate_data(X, accept_sparse="csr", copy=self.copy_X)
         self.gamma_ = 1 / X.shape[1] if self.gamma is None else self.gamma
-        self._centerer = KernelCenterer()
+        self._centerer = KernelCenterer().set_output(transform="default")
         K = self._get_kernel(X)
         self._fit_transform(K)
 
diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
index ab1ea5ebb5460..9e161c178b9e3 100644
--- a/sklearn/decomposition/_lda.py
+++ b/sklearn/decomposition/_lda.py
@@ -14,22 +14,28 @@
 
 import numpy as np
 import scipy.sparse as sp
-from scipy.special import gammaln, logsumexp
 from joblib import effective_n_jobs
+from scipy.special import gammaln, logsumexp
 
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
 from ..utils import check_random_state, gen_batches, gen_even_slices
-from ..utils.validation import check_non_negative
-from ..utils.validation import check_is_fitted
-from ..utils.parallel import delayed, Parallel
 from ..utils._param_validation import Interval, StrOptions
-
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import check_is_fitted, check_non_negative
 from ._online_lda_fast import (
-    mean_change as cy_mean_change,
     _dirichlet_expectation_1d as cy_dirichlet_expectation_1d,
+)
+from ._online_lda_fast import (
     _dirichlet_expectation_2d,
 )
+from ._online_lda_fast import (
+    mean_change as cy_mean_change,
+)
 
 EPS = np.finfo(float).eps
 
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index d561583dec205..40db8edd0b2fd 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -6,34 +6,37 @@
 #         Tom Dupre la Tour
 # License: BSD 3 clause
 
+import itertools
+import time
+import warnings
 from abc import ABC
+from math import sqrt
 from numbers import Integral, Real
+
 import numpy as np
 import scipy.sparse as sp
-import time
-import itertools
-import warnings
-from math import sqrt
 from scipy import linalg
 
-from ._cdnmf_fast import _update_cdnmf_fast
 from .._config import config_context
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
-from ..exceptions import ConvergenceWarning
-from ..utils import check_random_state, check_array, gen_batches
-from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
-from ..utils.validation import (
-    check_is_fitted,
-    check_non_negative,
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
 )
+from ..exceptions import ConvergenceWarning
+from ..utils import check_array, check_random_state, gen_batches, metadata_routing
 from ..utils._param_validation import (
     Interval,
     StrOptions,
     validate_params,
 )
-from ..utils import metadata_routing
-
+from ..utils.extmath import randomized_svd, safe_sparse_dot, squared_norm
+from ..utils.validation import (
+    check_is_fitted,
+    check_non_negative,
+)
+from ._cdnmf_fast import _update_cdnmf_fast
 
 EPSILON = np.finfo(np.float32).eps
 
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 1d3c0678aca89..96931324d7cae 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -15,20 +15,18 @@
 
 import numpy as np
 from scipy import linalg
-from scipy.special import gammaln
 from scipy.sparse import issparse
 from scipy.sparse.linalg import svds
+from scipy.special import gammaln
 
-from ._base import _BasePCA
 from ..base import _fit_context
 from ..utils import check_random_state
 from ..utils._arpack import _init_arpack_v0
+from ..utils._param_validation import Interval, RealNotInt, StrOptions
 from ..utils.deprecation import deprecated
-from ..utils.extmath import fast_logdet, randomized_svd, svd_flip
-from ..utils.extmath import stable_cumsum
+from ..utils.extmath import fast_logdet, randomized_svd, stable_cumsum, svd_flip
 from ..utils.validation import check_is_fitted
-from ..utils._param_validation import Interval, StrOptions
-from ..utils._param_validation import RealNotInt
+from ._base import _BasePCA
 
 
 def _assess_dimension(spectrum, rank, n_samples):
diff --git a/sklearn/decomposition/_sparse_pca.py b/sklearn/decomposition/_sparse_pca.py
index 93e4a2164a87f..aa4dec2fb7ee9 100644
--- a/sklearn/decomposition/_sparse_pca.py
+++ b/sklearn/decomposition/_sparse_pca.py
@@ -6,14 +6,18 @@
 
 import numpy as np
 
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..linear_model import ridge_regression
 from ..utils import check_random_state
-from ..utils.extmath import svd_flip
 from ..utils._param_validation import Hidden, Interval, StrOptions
+from ..utils.extmath import svd_flip
 from ..utils.validation import check_array, check_is_fitted
-from ..linear_model import ridge_regression
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
-from ._dict_learning import dict_learning, MiniBatchDictionaryLearning
+from ._dict_learning import MiniBatchDictionaryLearning, dict_learning
 
 
 class _BaseSparsePCA(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
index 67f5c73028f15..725683e8d46c6 100644
--- a/sklearn/decomposition/_truncated_svd.py
+++ b/sklearn/decomposition/_truncated_svd.py
@@ -7,18 +7,23 @@
 # License: 3-clause BSD.
 
 from numbers import Integral, Real
+
 import numpy as np
 import scipy.sparse as sp
 from scipy.sparse.linalg import svds
 
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
 from ..utils import check_array, check_random_state
 from ..utils._arpack import _init_arpack_v0
+from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip
 from ..utils.sparsefuncs import mean_variance_axis
 from ..utils.validation import check_is_fitted
-from ..utils._param_validation import Interval, StrOptions
 
 __all__ = ["TruncatedSVD"]
 
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index 6e6ddd20acb8c..b98d75eccbee3 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -1,38 +1,37 @@
-import pytest
+import itertools
 import warnings
+from functools import partial
 
 import numpy as np
-from functools import partial
-import itertools
+import pytest
 
 import sklearn
-
 from sklearn.base import clone
-
+from sklearn.decomposition import (
+    DictionaryLearning,
+    MiniBatchDictionaryLearning,
+    SparseCoder,
+    dict_learning,
+    dict_learning_online,
+    sparse_encode,
+)
+from sklearn.decomposition._dict_learning import _update_dict
 from sklearn.exceptions import ConvergenceWarning
-
 from sklearn.utils import check_array
+from sklearn.utils._testing import (
+    TempMemmap,
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.estimator_checks import (
+    check_transformer_data_not_an_array,
+    check_transformer_general,
+    check_transformers_unfitted,
+)
 from sklearn.utils.parallel import Parallel
 
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils._testing import TempMemmap
-
-from sklearn.decomposition import DictionaryLearning
-from sklearn.decomposition import MiniBatchDictionaryLearning
-from sklearn.decomposition import SparseCoder
-from sklearn.decomposition import dict_learning
-from sklearn.decomposition import dict_learning_online
-from sklearn.decomposition import sparse_encode
-from sklearn.utils.estimator_checks import check_transformer_data_not_an_array
-from sklearn.utils.estimator_checks import check_transformer_general
-from sklearn.utils.estimator_checks import check_transformers_unfitted
-
-from sklearn.decomposition._dict_learning import _update_dict
-
-
 rng_global = np.random.RandomState(0)
 n_samples, n_features = 10, 8
 X = rng_global.randn(n_samples, n_features)
@@ -44,7 +43,7 @@ def test_sparse_encode_shapes_omp():
     for n_components, n_samples in itertools.product([1, 5], [1, 9]):
         X_ = rng.randn(n_samples, n_features)
         dictionary = rng.randn(n_components, n_features)
-        for algorithm, n_jobs in itertools.product(algorithms, [1, 3]):
+        for algorithm, n_jobs in itertools.product(algorithms, [1, 2]):
             code = sparse_encode(X_, dictionary, algorithm=algorithm, n_jobs=n_jobs)
             assert code.shape == (n_samples, n_components)
 
@@ -397,8 +396,8 @@ def test_dict_learning_online_positivity(positive_code, positive_dict):
 def test_dict_learning_online_verbosity():
     # test verbosity for better coverage
     n_components = 5
-    from io import StringIO
     import sys
+    from io import StringIO
 
     old_stdout = sys.stdout
     try:
diff --git a/sklearn/decomposition/tests/test_factor_analysis.py b/sklearn/decomposition/tests/test_factor_analysis.py
index 4284327f3eeb4..2ff14f8d71722 100644
--- a/sklearn/decomposition/tests/test_factor_analysis.py
+++ b/sklearn/decomposition/tests/test_factor_analysis.py
@@ -7,12 +7,14 @@
 import numpy as np
 import pytest
 
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.exceptions import ConvergenceWarning
 from sklearn.decomposition import FactorAnalysis
-from sklearn.utils._testing import ignore_warnings
 from sklearn.decomposition._factor_analysis import _ortho_rotation
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    ignore_warnings,
+)
 
 
 # Ignore warnings from switching to more power iterations in randomized_svd
diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py
index 14938b3787a98..6a376b01ecb19 100644
--- a/sklearn/decomposition/tests/test_fastica.py
+++ b/sklearn/decomposition/tests/test_fastica.py
@@ -2,18 +2,17 @@
 Test the fastica algorithm.
 """
 import itertools
-import pytest
-import warnings
 import os
+import warnings
 
 import numpy as np
+import pytest
 from scipy import stats
 
-from sklearn.utils._testing import assert_allclose
-
-from sklearn.decomposition import FastICA, fastica, PCA
+from sklearn.decomposition import PCA, FastICA, fastica
 from sklearn.decomposition._fastica import _gs_decorrelation
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils._testing import assert_allclose
 
 
 def center_and_norm(x, axis=-1):
diff --git a/sklearn/decomposition/tests/test_incremental_pca.py b/sklearn/decomposition/tests/test_incremental_pca.py
index d8402dad24c04..6ef500b42026b 100644
--- a/sklearn/decomposition/tests/test_incremental_pca.py
+++ b/sklearn/decomposition/tests/test_incremental_pca.py
@@ -1,17 +1,18 @@
 """Tests for Incremental PCA."""
-import numpy as np
-import pytest
 import warnings
 
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose_dense_sparse
+import numpy as np
+import pytest
 from numpy.testing import assert_array_equal
+from scipy import sparse
 
 from sklearn import datasets
 from sklearn.decomposition import PCA, IncrementalPCA
-
-from scipy import sparse
+from sklearn.utils._testing import (
+    assert_allclose_dense_sparse,
+    assert_almost_equal,
+    assert_array_almost_equal,
+)
 
 iris = datasets.load_iris()
 
diff --git a/sklearn/decomposition/tests/test_kernel_pca.py b/sklearn/decomposition/tests/test_kernel_pca.py
index 39aa32a3e9694..fdaa71314f43f 100644
--- a/sklearn/decomposition/tests/test_kernel_pca.py
+++ b/sklearn/decomposition/tests/test_kernel_pca.py
@@ -1,23 +1,23 @@
-import numpy as np
-import scipy.sparse as sp
-import pytest
 import warnings
 
-from sklearn.utils._testing import (
-    assert_array_almost_equal,
-    assert_array_equal,
-    assert_allclose,
-)
+import numpy as np
+import pytest
+import scipy.sparse as sp
 
+import sklearn
+from sklearn.datasets import load_iris, make_blobs, make_circles
 from sklearn.decomposition import PCA, KernelPCA
-from sklearn.datasets import make_circles
-from sklearn.datasets import make_blobs
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import Perceptron
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
-from sklearn.model_selection import GridSearchCV
-from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 from sklearn.utils.validation import _check_psd_eigenvalues
 
 
@@ -551,3 +551,15 @@ def test_kernel_pca_inverse_correct_gamma():
     X2_recon = kpca2.inverse_transform(kpca1.transform(X))
 
     assert_allclose(X1_recon, X2_recon)
+
+
+def test_kernel_pca_pandas_output():
+    """Check that KernelPCA works with pandas output when the solver is arpack.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27579
+    """
+    pytest.importorskip("pandas")
+    X, _ = load_iris(as_frame=True, return_X_y=True)
+    with sklearn.config_context(transform_output="pandas"):
+        KernelPCA(n_components=2, eigen_solver="arpack").fit_transform(X)
diff --git a/sklearn/decomposition/tests/test_nmf.py b/sklearn/decomposition/tests/test_nmf.py
index 2b1ed4d91be5e..2cd027f90cdd6 100644
--- a/sklearn/decomposition/tests/test_nmf.py
+++ b/sklearn/decomposition/tests/test_nmf.py
@@ -1,27 +1,26 @@
 import re
 import sys
-from io import StringIO
 import warnings
+from io import StringIO
 
 import numpy as np
+import pytest
 import scipy.sparse as sp
-
 from scipy import linalg
-from sklearn.decomposition import NMF, MiniBatchNMF
-from sklearn.decomposition import non_negative_factorization
-from sklearn.decomposition import _nmf as nmf  # For testing internals
 from scipy.sparse import csc_matrix
 
-import pytest
-
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils.extmath import squared_norm
 from sklearn.base import clone
+from sklearn.decomposition import NMF, MiniBatchNMF, non_negative_factorization
+from sklearn.decomposition import _nmf as nmf  # For testing internals
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.extmath import squared_norm
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/decomposition/tests/test_online_lda.py b/sklearn/decomposition/tests/test_online_lda.py
index 872bd55916fcb..50c812bcb9f14 100644
--- a/sklearn/decomposition/tests/test_online_lda.py
+++ b/sklearn/decomposition/tests/test_online_lda.py
@@ -1,26 +1,25 @@
 import sys
+from io import StringIO
 
 import numpy as np
+import pytest
+from numpy.testing import assert_array_equal
 from scipy.linalg import block_diag
 from scipy.sparse import csr_matrix
 from scipy.special import psi
-from numpy.testing import assert_array_equal
-
-import pytest
 
 from sklearn.decomposition import LatentDirichletAllocation
 from sklearn.decomposition._online_lda_fast import (
     _dirichlet_expectation_1d,
     _dirichlet_expectation_2d,
 )
-
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import if_safe_multiprocessing_with_blas
-
 from sklearn.exceptions import NotFittedError
-from io import StringIO
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    if_safe_multiprocessing_with_blas,
+)
 
 
 def _build_sparse_mtx():
diff --git a/sklearn/decomposition/tests/test_pca.py b/sklearn/decomposition/tests/test_pca.py
index 5bf893f92fd16..0176ebd0be9e7 100644
--- a/sklearn/decomposition/tests/test_pca.py
+++ b/sklearn/decomposition/tests/test_pca.py
@@ -1,17 +1,15 @@
+import warnings
+
 import numpy as np
+import pytest
 import scipy as sp
 from numpy.testing import assert_array_equal
 
-import pytest
-import warnings
-
-from sklearn.utils._testing import assert_allclose
-
 from sklearn import datasets
-from sklearn.decomposition import PCA
 from sklearn.datasets import load_iris
-from sklearn.decomposition._pca import _assess_dimension
-from sklearn.decomposition._pca import _infer_dimension
+from sklearn.decomposition import PCA
+from sklearn.decomposition._pca import _assess_dimension, _infer_dimension
+from sklearn.utils._testing import assert_allclose
 
 iris = datasets.load_iris()
 PCA_SOLVERS = ["full", "arpack", "randomized", "auto"]
diff --git a/sklearn/decomposition/tests/test_sparse_pca.py b/sklearn/decomposition/tests/test_sparse_pca.py
index cf237014c6049..848d5d9d7ee34 100644
--- a/sklearn/decomposition/tests/test_sparse_pca.py
+++ b/sklearn/decomposition/tests/test_sparse_pca.py
@@ -2,17 +2,18 @@
 # License: BSD 3 clause
 
 import sys
-import pytest
 
 import numpy as np
+import pytest
 from numpy.testing import assert_array_equal
 
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import if_safe_multiprocessing_with_blas
-
-from sklearn.decomposition import SparsePCA, MiniBatchSparsePCA, PCA
+from sklearn.decomposition import PCA, MiniBatchSparsePCA, SparsePCA
 from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    if_safe_multiprocessing_with_blas,
+)
 
 
 def generate_toy_data(n_components, n_samples, image_size, random_state=None):
@@ -119,12 +120,12 @@ def test_initialization():
 def test_mini_batch_correct_shapes():
     rng = np.random.RandomState(0)
     X = rng.randn(12, 10)
-    pca = MiniBatchSparsePCA(n_components=8, random_state=rng)
+    pca = MiniBatchSparsePCA(n_components=8, max_iter=1, random_state=rng)
     U = pca.fit_transform(X)
     assert pca.components_.shape == (8, 10)
     assert U.shape == (12, 8)
     # test overcomplete decomposition
-    pca = MiniBatchSparsePCA(n_components=13, random_state=rng)
+    pca = MiniBatchSparsePCA(n_components=13, max_iter=1, random_state=rng)
     U = pca.fit_transform(X)
     assert pca.components_.shape == (13, 10)
     assert U.shape == (12, 13)
diff --git a/sklearn/decomposition/tests/test_truncated_svd.py b/sklearn/decomposition/tests/test_truncated_svd.py
index bd0bde6e08aa7..4edb7d4a11109 100644
--- a/sklearn/decomposition/tests/test_truncated_svd.py
+++ b/sklearn/decomposition/tests/test_truncated_svd.py
@@ -1,13 +1,12 @@
 """Test truncated SVD transformer."""
 
 import numpy as np
-import scipy.sparse as sp
-
 import pytest
+import scipy.sparse as sp
 
-from sklearn.decomposition import TruncatedSVD, PCA
+from sklearn.decomposition import PCA, TruncatedSVD
 from sklearn.utils import check_random_state
-from sklearn.utils._testing import assert_array_less, assert_allclose
+from sklearn.utils._testing import assert_allclose, assert_array_less
 
 SVD_SOLVERS = ["arpack", "randomized"]
 
diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py
index 275f4ae4d3b30..29146ca857694 100644
--- a/sklearn/discriminant_analysis.py
+++ b/sklearn/discriminant_analysis.py
@@ -10,24 +10,27 @@
 # License: BSD 3-Clause
 
 import warnings
+from numbers import Integral, Real
+
 import numpy as np
 import scipy.linalg
 from scipy import linalg
-from numbers import Real, Integral
 
-from .base import BaseEstimator, TransformerMixin, ClassifierMixin
-from .base import ClassNamePrefixFeaturesOutMixin
-from .base import _fit_context
+from .base import (
+    BaseEstimator,
+    ClassifierMixin,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from .covariance import empirical_covariance, ledoit_wolf, shrunk_covariance
 from .linear_model._base import LinearClassifierMixin
-from .covariance import ledoit_wolf, empirical_covariance, shrunk_covariance
-from .utils.multiclass import unique_labels
-from .utils.validation import check_is_fitted
-from .utils._array_api import get_namespace, _expit, device, size
-from .utils.multiclass import check_classification_targets
-from .utils.extmath import softmax
-from .utils._param_validation import StrOptions, Interval, HasMethods
 from .preprocessing import StandardScaler
-
+from .utils._array_api import _expit, device, get_namespace, size
+from .utils._param_validation import HasMethods, Interval, StrOptions
+from .utils.extmath import softmax
+from .utils.multiclass import check_classification_targets, unique_labels
+from .utils.validation import check_is_fitted
 
 __all__ = ["LinearDiscriminantAnalysis", "QuadraticDiscriminantAnalysis"]
 
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 0d8519484d7a5..63318b07ce580 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -9,18 +9,25 @@
 import numpy as np
 import scipy.sparse as sp
 
-from .base import BaseEstimator, ClassifierMixin, RegressorMixin
-from .base import MultiOutputMixin
-from .base import _fit_context
+from .base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MultiOutputMixin,
+    RegressorMixin,
+    _fit_context,
+)
 from .utils import check_random_state
-from .utils._param_validation import StrOptions, Interval
-from .utils.validation import _num_samples
-from .utils.validation import check_array
-from .utils.validation import check_consistent_length
-from .utils.validation import check_is_fitted, _check_sample_weight
+from .utils._param_validation import Interval, StrOptions
+from .utils.multiclass import class_distribution
 from .utils.random import _random_choice_csc
 from .utils.stats import _weighted_percentile
-from .utils.multiclass import class_distribution
+from .utils.validation import (
+    _check_sample_weight,
+    _num_samples,
+    check_array,
+    check_consistent_length,
+    check_is_fitted,
+)
 
 
 class DummyClassifier(MultiOutputMixin, ClassifierMixin, BaseEstimator):
@@ -220,7 +227,7 @@ def fit(self, X, y, sample_weight=None):
                         "The constant target value must be present in "
                         "the training data. You provided constant={}. "
                         "Possible values are: {}.".format(
-                            self.constant, list(self.classes_[k])
+                            self.constant, self.classes_[k].tolist()
                         )
                     )
                     raise ValueError(err_msg)
diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py
index e892d36a0ce46..f4a3756bdaf1d 100644
--- a/sklearn/ensemble/__init__.py
+++ b/sklearn/ensemble/__init__.py
@@ -2,27 +2,24 @@
 The :mod:`sklearn.ensemble` module includes ensemble-based methods for
 classification, regression and anomaly detection.
 """
+from ._bagging import BaggingClassifier, BaggingRegressor
 from ._base import BaseEnsemble
-from ._forest import RandomForestClassifier
-from ._forest import RandomForestRegressor
-from ._forest import RandomTreesEmbedding
-from ._forest import ExtraTreesClassifier
-from ._forest import ExtraTreesRegressor
-from ._bagging import BaggingClassifier
-from ._bagging import BaggingRegressor
-from ._iforest import IsolationForest
-from ._weight_boosting import AdaBoostClassifier
-from ._weight_boosting import AdaBoostRegressor
-from ._gb import GradientBoostingClassifier
-from ._gb import GradientBoostingRegressor
-from ._voting import VotingClassifier
-from ._voting import VotingRegressor
-from ._stacking import StackingClassifier
-from ._stacking import StackingRegressor
+from ._forest import (
+    ExtraTreesClassifier,
+    ExtraTreesRegressor,
+    RandomForestClassifier,
+    RandomForestRegressor,
+    RandomTreesEmbedding,
+)
+from ._gb import GradientBoostingClassifier, GradientBoostingRegressor
 from ._hist_gradient_boosting.gradient_boosting import (
-    HistGradientBoostingRegressor,
     HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
 )
+from ._iforest import IsolationForest
+from ._stacking import StackingClassifier, StackingRegressor
+from ._voting import VotingClassifier, VotingRegressor
+from ._weight_boosting import AdaBoostClassifier, AdaBoostRegressor
 
 __all__ = [
     "BaseEnsemble",
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index 0354413fdebfe..117bf470c509f 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -6,28 +6,25 @@
 
 import itertools
 import numbers
-import numpy as np
 from abc import ABCMeta, abstractmethod
+from functools import partial
 from numbers import Integral
 from warnings import warn
-from functools import partial
 
-from ._base import BaseEnsemble, _partition_estimators
-from ..base import ClassifierMixin, RegressorMixin
-from ..base import _fit_context
-from ..metrics import r2_score, accuracy_score
+import numpy as np
+
+from ..base import ClassifierMixin, RegressorMixin, _fit_context
+from ..metrics import accuracy_score, r2_score
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
-from ..utils import check_random_state, column_or_1d
-from ..utils import indices_to_mask
+from ..utils import check_random_state, column_or_1d, indices_to_mask
+from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
+from ..utils._tags import _safe_tags
 from ..utils.metaestimators import available_if
 from ..utils.multiclass import check_classification_targets
+from ..utils.parallel import Parallel, delayed
 from ..utils.random import sample_without_replacement
-from ..utils._param_validation import Interval, HasMethods, StrOptions
-from ..utils._param_validation import RealNotInt
-from ..utils.validation import has_fit_parameter, check_is_fitted, _check_sample_weight
-from ..utils._tags import _safe_tags
-from ..utils.parallel import delayed, Parallel
-
+from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter
+from ._base import BaseEnsemble, _partition_estimators
 
 __all__ = ["BaggingClassifier", "BaggingRegressor"]
 
diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py
index 3850fa724f11a..3107b4cf9a6c5 100644
--- a/sklearn/ensemble/_base.py
+++ b/sklearn/ensemble/_base.py
@@ -3,20 +3,15 @@
 # Authors: Gilles Louppe
 # License: BSD 3 clause
 
+import warnings
 from abc import ABCMeta, abstractmethod
 from typing import List
-import warnings
 
 import numpy as np
-
 from joblib import effective_n_jobs
 
-from ..base import clone
-from ..base import is_classifier, is_regressor
-from ..base import BaseEstimator
-from ..base import MetaEstimatorMixin
-from ..utils import Bunch, _print_elapsed_time, deprecated
-from ..utils import check_random_state
+from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier, is_regressor
+from ..utils import Bunch, _print_elapsed_time, check_random_state, deprecated
 from ..utils.metaestimators import _BaseComposition
 
 
diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index ce3a6f78b241d..4dbe4314f8933 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -40,19 +40,24 @@ class calls the ``fit`` method of each sub-estimator on random samples
 # License: BSD 3 clause
 
 
+import threading
+from abc import ABCMeta, abstractmethod
 from numbers import Integral, Real
 from warnings import catch_warnings, simplefilter, warn
-import threading
 
-from abc import ABCMeta, abstractmethod
 import numpy as np
-from scipy.sparse import issparse
 from scipy.sparse import hstack as sparse_hstack
+from scipy.sparse import issparse
 
-from ..base import is_classifier
-from ..base import ClassifierMixin, MultiOutputMixin, RegressorMixin, TransformerMixin
-from ..base import _fit_context
-
+from ..base import (
+    ClassifierMixin,
+    MultiOutputMixin,
+    RegressorMixin,
+    TransformerMixin,
+    _fit_context,
+    is_classifier,
+)
+from ..exceptions import DataConversionWarning
 from ..metrics import accuracy_score, r2_score
 from ..preprocessing import OneHotEncoder
 from ..tree import (
@@ -62,21 +67,18 @@ class calls the ``fit`` method of each sub-estimator on random samples
     ExtraTreeClassifier,
     ExtraTreeRegressor,
 )
-from ..tree._tree import DTYPE, DOUBLE
+from ..tree._tree import DOUBLE, DTYPE
 from ..utils import check_random_state, compute_sample_weight
-from ..exceptions import DataConversionWarning
-from ._base import BaseEnsemble, _partition_estimators
-from ..utils.parallel import delayed, Parallel
+from ..utils._param_validation import Interval, RealNotInt, StrOptions
 from ..utils.multiclass import check_classification_targets, type_of_target
+from ..utils.parallel import Parallel, delayed
 from ..utils.validation import (
-    check_is_fitted,
-    _check_sample_weight,
     _check_feature_names_in,
+    _check_sample_weight,
+    _num_samples,
+    check_is_fitted,
 )
-from ..utils.validation import _num_samples
-from ..utils._param_validation import Interval, StrOptions
-from ..utils._param_validation import RealNotInt
-
+from ._base import BaseEnsemble, _partition_estimators
 
 __all__ = [
     "RandomForestClassifier",
@@ -1975,7 +1977,7 @@ class ExtraTreesClassifier(ForestClassifier):
 
     Attributes
     ----------
-    estimator_ : :class:`~sklearn.tree.ExtraTreesClassifier`
+    estimator_ : :class:`~sklearn.tree.ExtraTreeClassifier`
         The child estimator template used to create the collection of fitted
         sub-estimators.
 
diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 1b924749f52bd..3d02eeab35a75 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -20,37 +20,26 @@
 #          Arnaud Joly, Jacob Schreiber
 # License: BSD 3 clause
 
-from abc import ABCMeta
-from abc import abstractmethod
-from numbers import Integral, Real
 import warnings
-
-from ._base import BaseEnsemble
-from ..base import ClassifierMixin, RegressorMixin
-from ..base import is_classifier
-from ..base import _fit_context
-
-from ._gradient_boosting import predict_stages
-from ._gradient_boosting import predict_stage
-from ._gradient_boosting import _random_sample_mask
+from abc import ABCMeta, abstractmethod
+from numbers import Integral, Real
+from time import time
 
 import numpy as np
+from scipy.sparse import csc_matrix, csr_matrix, issparse
 
-from scipy.sparse import csc_matrix
-from scipy.sparse import csr_matrix
-from scipy.sparse import issparse
-
-from time import time
+from ..base import ClassifierMixin, RegressorMixin, _fit_context, is_classifier
+from ..exceptions import NotFittedError
 from ..model_selection import train_test_split
 from ..tree import DecisionTreeRegressor
-from ..tree._tree import DTYPE, DOUBLE
-from . import _gb_losses
-
+from ..tree._tree import DOUBLE, DTYPE
 from ..utils import check_array, check_random_state, column_or_1d
 from ..utils._param_validation import HasMethods, Interval, StrOptions
-from ..utils.validation import check_is_fitted, _check_sample_weight
 from ..utils.multiclass import check_classification_targets
-from ..exceptions import NotFittedError
+from ..utils.validation import _check_sample_weight, check_is_fitted
+from . import _gb_losses
+from ._base import BaseEnsemble
+from ._gradient_boosting import _random_sample_mask, predict_stage, predict_stages
 
 
 class VerboseReporter:
@@ -443,16 +432,18 @@ def fit(self, X, y, sample_weight=None, monitor=None):
 
         if self.n_iter_no_change is not None:
             stratify = y if is_classifier(self) else None
-            X, X_val, y, y_val, sample_weight, sample_weight_val = train_test_split(
-                X,
-                y,
-                sample_weight,
-                random_state=self.random_state,
-                test_size=self.validation_fraction,
-                stratify=stratify,
+            X_train, X_val, y_train, y_val, sample_weight_train, sample_weight_val = (
+                train_test_split(
+                    X,
+                    y,
+                    sample_weight,
+                    random_state=self.random_state,
+                    test_size=self.validation_fraction,
+                    stratify=stratify,
+                )
             )
             if is_classifier(self):
-                if self._n_classes != np.unique(y).shape[0]:
+                if self._n_classes != np.unique(y_train).shape[0]:
                     # We choose to error here. The problem is that the init
                     # estimator would be trained on y, which has some missing
                     # classes now, so its predictions would not have the
@@ -463,6 +454,7 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                         "seed."
                     )
         else:
+            X_train, y_train, sample_weight_train = X, y, sample_weight
             X_val = y_val = sample_weight_val = None
 
         if not self._is_initialized():
@@ -472,19 +464,21 @@ def fit(self, X, y, sample_weight=None, monitor=None):
             # fit initial model and initialize raw predictions
             if self.init_ == "zero":
                 raw_predictions = np.zeros(
-                    shape=(X.shape[0], self._loss.K), dtype=np.float64
+                    shape=(X_train.shape[0], self._loss.K), dtype=np.float64
                 )
             else:
                 # XXX clean this once we have a support_sample_weight tag
                 if sample_weight_is_none:
-                    self.init_.fit(X, y)
+                    self.init_.fit(X_train, y_train)
                 else:
                     msg = (
                         "The initial estimator {} does not support sample "
                         "weights.".format(self.init_.__class__.__name__)
                     )
                     try:
-                        self.init_.fit(X, y, sample_weight=sample_weight)
+                        self.init_.fit(
+                            X_train, y_train, sample_weight=sample_weight_train
+                        )
                     except TypeError as e:
                         if "unexpected keyword argument 'sample_weight'" in str(e):
                             # regular estimator without SW support
@@ -502,7 +496,9 @@ def fit(self, X, y, sample_weight=None, monitor=None):
                         else:  # regular estimator whose input checking failed
                             raise
 
-                raw_predictions = self._loss.get_init_raw_predictions(X, self.init_)
+                raw_predictions = self._loss.get_init_raw_predictions(
+                    X_train, self.init_
+                )
 
             begin_at_stage = 0
 
@@ -522,22 +518,22 @@ def fit(self, X, y, sample_weight=None, monitor=None):
             # The requirements of _raw_predict
             # are more constrained than fit. It accepts only CSR
             # matrices. Finite values have already been checked in _validate_data.
-            X = check_array(
-                X,
+            X_train = check_array(
+                X_train,
                 dtype=DTYPE,
                 order="C",
                 accept_sparse="csr",
                 force_all_finite=False,
             )
-            raw_predictions = self._raw_predict(X)
+            raw_predictions = self._raw_predict(X_train)
             self._resize_state()
 
         # fit the boosting stages
         n_stages = self._fit_stages(
-            X,
-            y,
+            X_train,
+            y_train,
             raw_predictions,
-            sample_weight,
+            sample_weight_train,
             self._rng,
             X_val,
             y_val,
diff --git a/sklearn/ensemble/_gb_losses.py b/sklearn/ensemble/_gb_losses.py
index db2116d9aa2e1..7fb7e4726c325 100644
--- a/sklearn/ensemble/_gb_losses.py
+++ b/sklearn/ensemble/_gb_losses.py
@@ -2,16 +2,14 @@
 decision trees.
 """
 
-from abc import ABCMeta
-from abc import abstractmethod
+from abc import ABCMeta, abstractmethod
 
 import numpy as np
 from scipy.special import expit, logsumexp
 
+from ..dummy import DummyClassifier, DummyRegressor
 from ..tree._tree import TREE_LEAF
 from ..utils.stats import _weighted_percentile
-from ..dummy import DummyClassifier
-from ..dummy import DummyRegressor
 
 
 class LossFunction(metaclass=ABCMeta):
diff --git a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
index d80d558f03be8..a84c7dbf9f280 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
@@ -11,6 +11,7 @@ from .common cimport X_BINNED_DTYPE_C
 from .common cimport BITSET_INNER_DTYPE_C
 from .common cimport node_struct
 from ._bitset cimport in_bitset_2d_memoryview
+from sklearn.utils._typedefs cimport intp_t
 
 
 def _predict_from_raw_data(  # raw data = non-binned data
@@ -189,7 +190,7 @@ def _compute_partial_dependence(
         node_struct * current_node  # pointer to avoid copying attributes
 
         unsigned int sample_idx
-        unsigned feature_idx
+        intp_t feature_idx
         unsigned stack_size
         Y_DTYPE_C left_sample_frac
         Y_DTYPE_C current_weight
diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py
index 805a13b2d361b..8786e866d7be3 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/binning.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py
@@ -9,14 +9,14 @@
 
 import numpy as np
 
-from ...utils import check_random_state, check_array
 from ...base import BaseEstimator, TransformerMixin
-from ...utils.validation import check_is_fitted
-from ...utils.fixes import percentile
+from ...utils import check_array, check_random_state
 from ...utils._openmp_helpers import _openmp_effective_n_threads
+from ...utils.fixes import percentile
+from ...utils.validation import check_is_fitted
 from ._binning import _map_to_bins
-from .common import X_DTYPE, X_BINNED_DTYPE, ALMOST_INF, X_BITSET_INNER_DTYPE
 from ._bitset import set_bitset_memoryview
+from .common import ALMOST_INF, X_BINNED_DTYPE, X_BITSET_INNER_DTYPE, X_DTYPE
 
 
 def _find_binning_thresholds(col_data, max_bins):
diff --git a/sklearn/ensemble/_hist_gradient_boosting/common.pxd b/sklearn/ensemble/_hist_gradient_boosting/common.pxd
index d1c70f0483ed4..3e71f2dc56060 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/common.pxd
+++ b/sklearn/ensemble/_hist_gradient_boosting/common.pxd
@@ -1,4 +1,5 @@
 cimport numpy as cnp
+from sklearn.utils._typedefs cimport intp_t
 
 cnp.import_array()
 
@@ -23,7 +24,7 @@ cdef packed struct node_struct:
     # needs to be packed since by default numpy dtypes aren't aligned
     Y_DTYPE_C value
     unsigned int count
-    unsigned int feature_idx
+    intp_t feature_idx
     X_DTYPE_C num_threshold
     unsigned char missing_go_to_left
     unsigned int left
diff --git a/sklearn/ensemble/_hist_gradient_boosting/common.pyx b/sklearn/ensemble/_hist_gradient_boosting/common.pyx
index f7b36f5796508..33264a3c21295 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/common.pyx
+++ b/sklearn/ensemble/_hist_gradient_boosting/common.pyx
@@ -19,7 +19,7 @@ HISTOGRAM_DTYPE = np.dtype([
 PREDICTOR_RECORD_DTYPE = np.dtype([
     ('value', Y_DTYPE),
     ('count', np.uint32),
-    ('feature_idx', np.uint32),
+    ('feature_idx', np.intp),
     ('num_threshold', X_DTYPE),
     ('missing_go_to_left', np.uint8),
     ('left', np.uint32),
diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
index 79b640057abe5..5d030d3add5bb 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
@@ -1,13 +1,14 @@
 """Fast Gradient Boosting decision trees for classification and regression."""
 # Author: Nicolas Hug
 
+import itertools
 from abc import ABC, abstractmethod
 from functools import partial
-import itertools
-from numbers import Real, Integral
+from numbers import Integral, Real
+from timeit import default_timer as time
 
 import numpy as np
-from timeit import default_timer as time
+
 from ..._loss.loss import (
     _LOSSES,
     BaseLoss,
@@ -17,29 +18,31 @@
     HalfPoissonLoss,
     PinballLoss,
 )
-from ...base import BaseEstimator, RegressorMixin, ClassifierMixin, is_classifier
-from ...base import _fit_context
-from ...utils import check_random_state, resample, compute_sample_weight
-from ...utils.validation import (
-    check_is_fitted,
-    check_consistent_length,
-    _check_sample_weight,
-    _check_monotonic_cst,
+from ...base import (
+    BaseEstimator,
+    ClassifierMixin,
+    RegressorMixin,
+    _fit_context,
+    is_classifier,
 )
-from ...utils._param_validation import Interval, StrOptions
-from ...utils._param_validation import RealNotInt
-from ...utils._openmp_helpers import _openmp_effective_n_threads
-from ...utils.multiclass import check_classification_targets
 from ...metrics import check_scoring
 from ...model_selection import train_test_split
 from ...preprocessing import LabelEncoder
+from ...utils import check_random_state, compute_sample_weight, resample
+from ...utils._openmp_helpers import _openmp_effective_n_threads
+from ...utils._param_validation import Interval, RealNotInt, StrOptions
+from ...utils.multiclass import check_classification_targets
+from ...utils.validation import (
+    _check_monotonic_cst,
+    _check_sample_weight,
+    check_consistent_length,
+    check_is_fitted,
+)
 from ._gradient_boosting import _update_raw_predictions
-from .common import Y_DTYPE, X_DTYPE, G_H_DTYPE
-
 from .binning import _BinMapper
+from .common import G_H_DTYPE, X_DTYPE, Y_DTYPE
 from .grower import TreeGrower
 
-
 _LOSSES = _LOSSES.copy()
 _LOSSES.update(
     {
@@ -379,7 +382,7 @@ def fit(self, X, y, sample_weight=None):
 
         rng = check_random_state(self.random_state)
 
-        # When warm starting, we want to re-use the same seed that was used
+        # When warm starting, we want to reuse the same seed that was used
         # the first time fit was called (e.g. for subsampling or for the
         # train/val split).
         if not (self.warm_start and self._is_fitted()):
@@ -535,7 +538,7 @@ def fit(self, X, y, sample_weight=None):
                     # we're going to compute scoring w.r.t the loss. As losses
                     # take raw predictions as input (unlike the scorers), we
                     # can optimize a bit and avoid repeating computing the
-                    # predictions of the previous trees. We'll re-use
+                    # predictions of the previous trees. We'll reuse
                     # raw_predictions (as it's needed for training anyway) for
                     # evaluating the training loss, and create
                     # raw_predictions_val for storing the raw predictions of
@@ -1271,6 +1274,9 @@ class HistGradientBoostingRegressor(RegressorMixin, BaseHistGradientBoosting):
         For each categorical feature, there must be at most `max_bins` unique
         categories, and each categorical value must be less then `max_bins - 1`.
         Negative values for categorical features are treated as missing values.
+        All categorical values are converted to floating point numbers.
+        This means that categorical values of 1.0 and 1 are treated as
+        the same category.
 
         Read more in the :ref:`User Guide <categorical_support_gbdt>`.
 
@@ -1628,6 +1634,9 @@ class HistGradientBoostingClassifier(ClassifierMixin, BaseHistGradientBoosting):
         For each categorical feature, there must be at most `max_bins` unique
         categories, and each categorical value must be less then `max_bins - 1`.
         Negative values for categorical features are treated as missing values.
+        All categorical values are converted to floating point numbers.
+        This means that categorical values of 1.0 and 1 are treated as
+        the same category.
 
         Read more in the :ref:`User Guide <categorical_support_gbdt>`.
 
diff --git a/sklearn/ensemble/_hist_gradient_boosting/grower.py b/sklearn/ensemble/_hist_gradient_boosting/grower.py
index b8c0c17969e99..4ed6041ecaa30 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -6,22 +6,25 @@
 """
 # Author: Nicolas Hug
 
-from heapq import heappush, heappop
-import numpy as np
-from timeit import default_timer as time
 import numbers
+from heapq import heappop, heappush
+from timeit import default_timer as time
 
-from .splitting import Splitter
+import numpy as np
+
+from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+
+from ._bitset import set_raw_bitset_from_binned_bitset
+from .common import (
+    PREDICTOR_RECORD_DTYPE,
+    X_BITSET_INNER_DTYPE,
+    Y_DTYPE,
+    MonotonicConstraint,
+)
 from .histogram import HistogramBuilder
 from .predictor import TreePredictor
+from .splitting import Splitter
 from .utils import sum_parallel
-from .common import PREDICTOR_RECORD_DTYPE
-from .common import X_BITSET_INNER_DTYPE
-from .common import Y_DTYPE
-from .common import MonotonicConstraint
-from ._bitset import set_raw_bitset_from_binned_bitset
-from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
-
 
 EPS = np.finfo(Y_DTYPE).eps  # to avoid zero division errors
 
diff --git a/sklearn/ensemble/_hist_gradient_boosting/predictor.py b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
index 746fa34753121..600e55e43467f 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/predictor.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/predictor.py
@@ -5,10 +5,12 @@
 
 import numpy as np
 
+from ._predictor import (
+    _compute_partial_dependence,
+    _predict_from_binned_data,
+    _predict_from_raw_data,
+)
 from .common import Y_DTYPE
-from ._predictor import _predict_from_raw_data
-from ._predictor import _predict_from_binned_data
-from ._predictor import _compute_partial_dependence
 
 
 class TreePredictor:
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py
index 08bfebfcbf6c9..6f9fcd0057141 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py
@@ -1,15 +1,17 @@
 import numpy as np
-from numpy.testing import assert_array_equal, assert_allclose
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 
 from sklearn.ensemble._hist_gradient_boosting.binning import (
     _BinMapper,
     _find_binning_thresholds,
     _map_to_bins,
 )
-from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import ALMOST_INF
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    ALMOST_INF,
+    X_BINNED_DTYPE,
+    X_DTYPE,
+)
 from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
 
 n_threads = _openmp_effective_n_threads()
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py
index e058781cefcef..c02d66b666f80 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py
@@ -1,10 +1,10 @@
-import pytest
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 
 from sklearn.ensemble._hist_gradient_boosting._bitset import (
-    set_bitset_memoryview,
     in_bitset_memoryview,
+    set_bitset_memoryview,
     set_raw_bitset_from_binned_bitset,
 )
 from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
index 6bd5b38d5a4ee..bbdcb38ef013a 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
@@ -1,13 +1,15 @@
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score
-from sklearn.datasets import make_classification, make_regression
 import numpy as np
 import pytest
 
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
 from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
 from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
 
 
 @pytest.mark.parametrize("seed", range(5))
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
index 94d8960b6e813..4851c8e129203 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
@@ -1,36 +1,35 @@
+import re
 import warnings
 
-import re
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose, assert_array_equal
+
 from sklearn._loss.loss import (
     AbsoluteError,
     HalfBinomialLoss,
     HalfSquaredError,
     PinballLoss,
 )
-from sklearn.datasets import make_classification, make_regression
-from sklearn.datasets import make_low_rank_matrix
-from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder
-from sklearn.model_selection import train_test_split, cross_val_score
-from sklearn.base import clone, BaseEstimator, TransformerMixin
-from sklearn.base import is_regressor
-from sklearn.pipeline import make_pipeline
-from sklearn.metrics import mean_gamma_deviance, mean_poisson_deviance
-from sklearn.dummy import DummyRegressor
-from sklearn.exceptions import NotFittedError
+from sklearn.base import BaseEstimator, TransformerMixin, clone, is_regressor
 from sklearn.compose import make_column_transformer
-
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.ensemble import HistGradientBoostingClassifier
-from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
+from sklearn.datasets import make_classification, make_low_rank_matrix, make_regression
+from sklearn.dummy import DummyRegressor
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
 from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
 from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE
+from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
+from sklearn.exceptions import NotFittedError
+from sklearn.metrics import mean_gamma_deviance, mean_poisson_deviance
+from sklearn.model_selection import cross_val_score, train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler, OneHotEncoder
 from sklearn.utils import shuffle
 from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
 
-
 n_threads = _openmp_effective_n_threads()
 
 X_classification, y_classification = make_classification(random_state=0)
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py
index f3380fbf2af6d..a55cb871e3c72 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py
@@ -1,17 +1,18 @@
 import numpy as np
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 from pytest import approx
-from numpy.testing import assert_array_equal
-from numpy.testing import assert_allclose
 
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
 from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
-from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import X_BITSET_INNER_DTYPE
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    G_H_DTYPE,
+    X_BINNED_DTYPE,
+    X_BITSET_INNER_DTYPE,
+    X_DTYPE,
+    Y_DTYPE,
+)
+from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
+from sklearn.preprocessing import OneHotEncoder
 from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
 
 n_threads = _openmp_effective_n_threads()
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
index 1d5963d20739b..99f74b0f542ee 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
@@ -1,20 +1,20 @@
 import numpy as np
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 
-from numpy.testing import assert_allclose
-from numpy.testing import assert_array_equal
-
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    G_H_DTYPE,
+    HISTOGRAM_DTYPE,
+    X_BINNED_DTYPE,
+)
 from sklearn.ensemble._hist_gradient_boosting.histogram import (
-    _build_histogram_naive,
     _build_histogram,
+    _build_histogram_naive,
     _build_histogram_no_hessian,
-    _build_histogram_root_no_hessian,
     _build_histogram_root,
+    _build_histogram_root_no_hessian,
     _subtract_histograms,
 )
-from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE
 
 
 @pytest.mark.parametrize("build_func", [_build_histogram_naive, _build_histogram])
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py
index f11bec3bd77db..7782b5b32eb68 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_contraints.py
@@ -1,18 +1,23 @@
 import re
+
 import numpy as np
 import pytest
 
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    G_H_DTYPE,
+    X_BINNED_DTYPE,
+    MonotonicConstraint,
+)
 from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
-from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint
+from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder
 from sklearn.ensemble._hist_gradient_boosting.splitting import (
     Splitter,
     compute_node_value,
 )
-from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
 from sklearn.utils._testing import _convert_container
 
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py
index 856ab180459d2..3c3c9ae81bac2 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py
@@ -1,25 +1,25 @@
 import numpy as np
-from numpy.testing import assert_allclose
-from sklearn.datasets import make_regression
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import r2_score
 import pytest
+from numpy.testing import assert_allclose
 
+from sklearn.datasets import make_regression
+from sklearn.ensemble._hist_gradient_boosting._bitset import (
+    set_bitset_memoryview,
+    set_raw_bitset_from_binned_bitset,
+)
 from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
-from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
-from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor
 from sklearn.ensemble._hist_gradient_boosting.common import (
+    ALMOST_INF,
     G_H_DTYPE,
     PREDICTOR_RECORD_DTYPE,
-    ALMOST_INF,
     X_BINNED_DTYPE,
     X_BITSET_INNER_DTYPE,
     X_DTYPE,
 )
-from sklearn.ensemble._hist_gradient_boosting._bitset import (
-    set_bitset_memoryview,
-    set_raw_bitset_from_binned_bitset,
-)
+from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
+from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor
+from sklearn.metrics import r2_score
+from sklearn.model_selection import train_test_split
 from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
 
 n_threads = _openmp_effective_n_threads()
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py
index 255d13bb08456..f862273beadf5 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py
@@ -2,17 +2,19 @@
 import pytest
 from numpy.testing import assert_array_equal
 
-from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import G_H_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import X_BINNED_DTYPE
-from sklearn.ensemble._hist_gradient_boosting.common import MonotonicConstraint
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    G_H_DTYPE,
+    HISTOGRAM_DTYPE,
+    X_BINNED_DTYPE,
+    MonotonicConstraint,
+)
+from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder
 from sklearn.ensemble._hist_gradient_boosting.splitting import (
     Splitter,
     compute_node_value,
 )
-from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder
-from sklearn.utils._testing import skip_if_32bit
 from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+from sklearn.utils._testing import skip_if_32bit
 
 n_threads = _openmp_effective_n_threads()
 
diff --git a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py
index f8d7533ec38bc..03a2720b36127 100644
--- a/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py
+++ b/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py
@@ -1,17 +1,15 @@
 import numpy as np
-from numpy.testing import assert_array_equal
-from numpy.testing import assert_allclose
-
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 
 from sklearn.base import clone
 from sklearn.datasets import make_classification, make_regression
-
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
 from sklearn.metrics import check_scoring
 
-
 X_classification, y_classification = make_classification(random_state=0)
 X_regression, y_regression = make_regression(random_state=0)
 
diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py
index 048a1d69395e2..9371d2e4e6c5b 100644
--- a/sklearn/ensemble/_iforest.py
+++ b/sklearn/ensemble/_iforest.py
@@ -3,25 +3,23 @@
 # License: BSD 3 clause
 
 import numbers
+from numbers import Integral, Real
+from warnings import warn
+
 import numpy as np
 from scipy.sparse import issparse
-from warnings import warn
-from numbers import Integral, Real
 
+from ..base import OutlierMixin, _fit_context
 from ..tree import ExtraTreeRegressor
 from ..tree._tree import DTYPE as tree_dtype
 from ..utils import (
-    check_random_state,
     check_array,
+    check_random_state,
     gen_batches,
     get_chunk_n_rows,
 )
-from ..utils._param_validation import Interval, StrOptions
-from ..utils._param_validation import RealNotInt
-from ..utils.validation import check_is_fitted, _num_samples
-from ..base import OutlierMixin
-from ..base import _fit_context
-
+from ..utils._param_validation import Interval, RealNotInt, StrOptions
+from ..utils.validation import _num_samples, check_is_fitted
 from ._bagging import BaseBagging
 
 __all__ = ["IsolationForest"]
diff --git a/sklearn/ensemble/_stacking.py b/sklearn/ensemble/_stacking.py
index 5b3486edfeb33..2129e4d9a0134 100644
--- a/sklearn/ensemble/_stacking.py
+++ b/sklearn/ensemble/_stacking.py
@@ -10,35 +10,32 @@
 import numpy as np
 import scipy.sparse as sparse
 
-from ..base import clone
-from ..base import ClassifierMixin, RegressorMixin, TransformerMixin
-from ..base import is_classifier, is_regressor
-from ..base import _fit_context
+from ..base import (
+    ClassifierMixin,
+    RegressorMixin,
+    TransformerMixin,
+    _fit_context,
+    clone,
+    is_classifier,
+    is_regressor,
+)
 from ..exceptions import NotFittedError
-from ..utils._estimator_html_repr import _VisualBlock
-
-from ._base import _fit_single_estimator
-from ._base import _BaseHeterogeneousEnsemble
-
-from ..linear_model import LogisticRegression
-from ..linear_model import RidgeCV
-
-from ..model_selection import cross_val_predict
-from ..model_selection import check_cv
-
+from ..linear_model import LogisticRegression, RidgeCV
+from ..model_selection import check_cv, cross_val_predict
 from ..preprocessing import LabelEncoder
-
 from ..utils import Bunch
-from ..utils.multiclass import check_classification_targets, type_of_target
-from ..utils.metaestimators import available_if
-from ..utils.parallel import delayed, Parallel
+from ..utils._estimator_html_repr import _VisualBlock
 from ..utils._param_validation import HasMethods, StrOptions
+from ..utils.metaestimators import available_if
+from ..utils.multiclass import check_classification_targets, type_of_target
+from ..utils.parallel import Parallel, delayed
 from ..utils.validation import (
     _check_feature_names_in,
     _check_response_method,
     check_is_fitted,
     column_or_1d,
 )
+from ._base import _BaseHeterogeneousEnsemble, _fit_single_estimator
 
 
 def _estimator_has(attr):
diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py
index f8f4d2c4c197f..50670a5a52699 100644
--- a/sklearn/ensemble/_voting.py
+++ b/sklearn/ensemble/_voting.py
@@ -18,24 +18,23 @@
 
 import numpy as np
 
-from ..base import ClassifierMixin
-from ..base import RegressorMixin
-from ..base import TransformerMixin
-from ..base import clone
-from ..base import _fit_context
-from ._base import _fit_single_estimator
-from ._base import _BaseHeterogeneousEnsemble
+from ..base import (
+    ClassifierMixin,
+    RegressorMixin,
+    TransformerMixin,
+    _fit_context,
+    clone,
+)
+from ..exceptions import NotFittedError
 from ..preprocessing import LabelEncoder
 from ..utils import Bunch
+from ..utils._estimator_html_repr import _VisualBlock
+from ..utils._param_validation import StrOptions
 from ..utils.metaestimators import available_if
-from ..utils.validation import check_is_fitted
-from ..utils.validation import _check_feature_names_in
 from ..utils.multiclass import check_classification_targets
-from ..utils.validation import column_or_1d
-from ..utils._param_validation import StrOptions
-from ..exceptions import NotFittedError
-from ..utils._estimator_html_repr import _VisualBlock
-from ..utils.parallel import delayed, Parallel
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import _check_feature_names_in, check_is_fitted, column_or_1d
+from ._base import _BaseHeterogeneousEnsemble, _fit_single_estimator
 
 
 class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):
diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py
index 569609e6326e5..45a87ad6521fd 100644
--- a/sklearn/ensemble/_weight_boosting.py
+++ b/sklearn/ensemble/_weight_boosting.py
@@ -23,28 +23,32 @@
 #
 # License: BSD 3 clause
 
+import warnings
 from abc import ABCMeta, abstractmethod
-
 from numbers import Integral, Real
-import numpy as np
-
-import warnings
 
+import numpy as np
 from scipy.special import xlogy
 
-from ._base import BaseEnsemble
-from ..base import ClassifierMixin, RegressorMixin, is_classifier, is_regressor
-from ..base import _fit_context
-from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
-from ..utils import check_random_state, _safe_indexing
-from ..utils.extmath import softmax
-from ..utils.extmath import stable_cumsum
+from ..base import (
+    ClassifierMixin,
+    RegressorMixin,
+    _fit_context,
+    is_classifier,
+    is_regressor,
+)
 from ..metrics import accuracy_score, r2_score
-from ..utils.validation import check_is_fitted
-from ..utils.validation import _check_sample_weight
-from ..utils.validation import has_fit_parameter
-from ..utils.validation import _num_samples
+from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
+from ..utils import _safe_indexing, check_random_state
 from ..utils._param_validation import HasMethods, Interval, StrOptions
+from ..utils.extmath import softmax, stable_cumsum
+from ..utils.validation import (
+    _check_sample_weight,
+    _num_samples,
+    check_is_fitted,
+    has_fit_parameter,
+)
+from ._base import BaseEnsemble
 
 __all__ = [
     "AdaBoostClassifier",
@@ -757,7 +761,7 @@ def decision_function(self, X):
         -------
         score : ndarray of shape of (n_samples, k)
             The decision function of the input samples. The order of
-            outputs is the same of that of the :term:`classes_` attribute.
+            outputs is the same as that of the :term:`classes_` attribute.
             Binary classification is a special cases with ``k == 1``,
             otherwise ``k==n_classes``. For binary classification,
             values closer to -1 or 1 mean more like the first or second
@@ -776,7 +780,11 @@ class in ``classes_``, respectively.
             )
         else:  # self.algorithm == "SAMME"
             pred = sum(
-                (estimator.predict(X) == classes).T * w
+                np.where(
+                    (estimator.predict(X) == classes).T,
+                    w,
+                    -1 / (n_classes - 1) * w,
+                )
                 for estimator, w in zip(self.estimators_, self.estimator_weights_)
             )
 
@@ -823,8 +831,11 @@ class in ``classes_``, respectively.
                 # The weights are all 1. for SAMME.R
                 current_pred = _samme_proba(estimator, n_classes, X)
             else:  # elif self.algorithm == "SAMME":
-                current_pred = estimator.predict(X)
-                current_pred = (current_pred == classes).T * weight
+                current_pred = np.where(
+                    (estimator.predict(X) == classes).T,
+                    weight,
+                    -1 / (n_classes - 1) * weight,
+                )
 
             if pred is None:
                 pred = current_pred
diff --git a/sklearn/ensemble/tests/test_bagging.py b/sklearn/ensemble/tests/test_bagging.py
index f6311e8c459d4..2c1067ccfc248 100644
--- a/sklearn/ensemble/tests/test_bagging.py
+++ b/sklearn/ensemble/tests/test_bagging.py
@@ -4,35 +4,33 @@
 
 # Author: Gilles Louppe
 # License: BSD 3 clause
-from itertools import product
+from itertools import cycle, product
 
-import numpy as np
 import joblib
+import numpy as np
 import pytest
+from scipy.sparse import csc_matrix, csr_matrix
 
 from sklearn.base import BaseEstimator
-
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2
 from sklearn.dummy import DummyClassifier, DummyRegressor
-from sklearn.model_selection import GridSearchCV, ParameterGrid
-from sklearn.ensemble import BaggingClassifier, BaggingRegressor
-from sklearn.linear_model import Perceptron, LogisticRegression
+from sklearn.ensemble import (
+    BaggingClassifier,
+    BaggingRegressor,
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.feature_selection import SelectKBest
+from sklearn.linear_model import LogisticRegression, Perceptron
+from sklearn.model_selection import GridSearchCV, ParameterGrid, train_test_split
 from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.svm import SVC, SVR
-from sklearn.random_projection import SparseRandomProjection
 from sklearn.pipeline import make_pipeline
-from sklearn.feature_selection import SelectKBest
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import HistGradientBoostingClassifier
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.datasets import load_diabetes, load_iris, make_hastie_10_2
-from sklearn.utils import check_random_state
 from sklearn.preprocessing import FunctionTransformer, scale
-from itertools import cycle
-
-from scipy.sparse import csc_matrix, csr_matrix
+from sklearn.random_projection import SparseRandomProjection
+from sklearn.svm import SVC, SVR
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
 
 rng = check_random_state(0)
 
diff --git a/sklearn/ensemble/tests/test_base.py b/sklearn/ensemble/tests/test_base.py
index fe4b1e33ae7b3..8687d91053a22 100644
--- a/sklearn/ensemble/tests/test_base.py
+++ b/sklearn/ensemble/tests/test_base.py
@@ -5,19 +5,19 @@
 # Authors: Gilles Louppe
 # License: BSD 3 clause
 
+from collections import OrderedDict
+
 import numpy as np
 import pytest
 
+from sklearn import ensemble
 from sklearn.datasets import load_iris
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
 from sklearn.ensemble import BaggingClassifier
 from sklearn.ensemble._base import _set_random_states
-from sklearn.linear_model import Perceptron
-from sklearn.linear_model import Ridge, LogisticRegression
-from collections import OrderedDict
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
-from sklearn.pipeline import Pipeline
 from sklearn.feature_selection import SelectFromModel
-from sklearn import ensemble
+from sklearn.linear_model import LogisticRegression, Perceptron, Ridge
+from sklearn.pipeline import Pipeline
 
 
 def test_base():
diff --git a/sklearn/ensemble/tests/test_common.py b/sklearn/ensemble/tests/test_common.py
index 5bafe08881ae9..7e14b34993d6f 100644
--- a/sklearn/ensemble/tests/test_common.py
+++ b/sklearn/ensemble/tests/test_common.py
@@ -1,21 +1,25 @@
 import numpy as np
 import pytest
 
-from sklearn.base import clone
-from sklearn.base import ClassifierMixin
-from sklearn.base import is_classifier
-
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_regression
-from sklearn.datasets import load_iris, load_diabetes
+from sklearn.base import ClassifierMixin, clone, is_classifier
+from sklearn.datasets import (
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_regression,
+)
+from sklearn.ensemble import (
+    RandomForestClassifier,
+    RandomForestRegressor,
+    StackingClassifier,
+    StackingRegressor,
+    VotingClassifier,
+    VotingRegressor,
+)
 from sklearn.impute import SimpleImputer
-from sklearn.linear_model import LogisticRegression, LinearRegression
-from sklearn.svm import LinearSVC, LinearSVR, SVC, SVR
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.pipeline import make_pipeline
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-
-from sklearn.ensemble import StackingClassifier, StackingRegressor
-from sklearn.ensemble import VotingClassifier, VotingRegressor
+from sklearn.svm import SVC, SVR, LinearSVC, LinearSVR
 
 X, y = load_iris(return_X_y=True)
 
diff --git a/sklearn/ensemble/tests/test_forest.py b/sklearn/ensemble/tests/test_forest.py
index 9ee29f717af88..15d2999b5ef4d 100644
--- a/sklearn/ensemble/tests/test_forest.py
+++ b/sklearn/ensemble/tests/test_forest.py
@@ -8,58 +8,54 @@
 #          Arnaud Joly
 # License: BSD 3 clause
 
-import pickle
+import itertools
 import math
+import pickle
 from collections import defaultdict
 from functools import partial
-import itertools
-from itertools import combinations
-from itertools import product
-from typing import Dict, Any
-
-import numpy as np
-from scipy.sparse import csr_matrix
-from scipy.sparse import csc_matrix
-from scipy.sparse import coo_matrix
-from scipy.special import comb
+from itertools import combinations, product
+from typing import Any, Dict
+from unittest.mock import patch
 
 import joblib
-
+import numpy as np
 import pytest
+from scipy.sparse import coo_matrix, csc_matrix, csr_matrix
+from scipy.special import comb
 
 import sklearn
-from sklearn.dummy import DummyRegressor
-from sklearn.metrics import mean_poisson_deviance
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import _convert_container
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils._testing import skip_if_no_parallel
-
-from sklearn.exceptions import NotFittedError
-
 from sklearn import datasets
-from sklearn.decomposition import TruncatedSVD
 from sklearn.datasets import make_classification
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.ensemble import ExtraTreesRegressor
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.ensemble import RandomTreesEmbedding
-from sklearn.metrics import explained_variance_score, f1_score
-from sklearn.model_selection import train_test_split, cross_val_score
-from sklearn.model_selection import GridSearchCV
+from sklearn.decomposition import TruncatedSVD
+from sklearn.dummy import DummyRegressor
+from sklearn.ensemble import (
+    ExtraTreesClassifier,
+    ExtraTreesRegressor,
+    RandomForestClassifier,
+    RandomForestRegressor,
+    RandomTreesEmbedding,
+)
+from sklearn.exceptions import NotFittedError
+from sklearn.metrics import (
+    explained_variance_score,
+    f1_score,
+    mean_poisson_deviance,
+    mean_squared_error,
+)
+from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
 from sklearn.svm import LinearSVC
+from sklearn.tree._classes import SPARSE_SPLITTERS
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+    skip_if_no_parallel,
+)
 from sklearn.utils.parallel import Parallel
 from sklearn.utils.validation import check_random_state
 
-from sklearn.metrics import mean_squared_error
-
-from sklearn.tree._classes import SPARSE_SPLITTERS
-
-from unittest.mock import patch
-
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
 y = [-1, -1, -1, 1, 1, 1]
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
index ad31b2ed732e9..6e8335402c78c 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -3,38 +3,34 @@
 """
 import re
 import warnings
+
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
-
-from scipy.sparse import csr_matrix
-from scipy.sparse import csc_matrix
-from scipy.sparse import coo_matrix
+from scipy.sparse import coo_matrix, csc_matrix, csr_matrix
 from scipy.special import expit
 
-import pytest
-
 from sklearn import datasets
 from sklearn.base import clone
 from sklearn.datasets import make_classification, make_regression
-from sklearn.ensemble import GradientBoostingClassifier
-from sklearn.ensemble import GradientBoostingRegressor
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
 from sklearn.ensemble._gradient_boosting import predict_stages
-from sklearn.preprocessing import scale
+from sklearn.exceptions import DataConversionWarning, NotFittedError
+from sklearn.linear_model import LinearRegression
 from sklearn.metrics import mean_squared_error
 from sklearn.model_selection import train_test_split
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import scale
+from sklearn.svm import NuSVR
 from sklearn.utils import check_random_state, tosequence
 from sklearn.utils._mocking import NoSampleWeightWrapper
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import skip_if_32bit
 from sklearn.utils._param_validation import InvalidParameterError
-from sklearn.exceptions import DataConversionWarning
-from sklearn.exceptions import NotFittedError
-from sklearn.dummy import DummyClassifier, DummyRegressor
-from sklearn.pipeline import make_pipeline
-from sklearn.linear_model import LinearRegression
-from sklearn.svm import NuSVR
-
+from sklearn.utils._testing import (
+    assert_array_almost_equal,
+    assert_array_equal,
+    skip_if_32bit,
+)
 
 GRADIENT_BOOSTING_ESTIMATORS = [GradientBoostingClassifier, GradientBoostingRegressor]
 
@@ -84,10 +80,15 @@ def test_classification_toy(loss, global_random_seed):
 def test_classification_synthetic(loss, global_random_seed):
     # Test GradientBoostingClassifier on synthetic dataset used by
     # Hastie et al. in ESLII - Figure 10.9
-    X, y = datasets.make_hastie_10_2(n_samples=12000, random_state=global_random_seed)
+    # Note that Figure 10.9 reuses the dataset generated for figure 10.2
+    # and should have 2_000 train data points and 10_000 test data points.
+    # Here we intentionally use a smaller variant to make the test run faster,
+    # but the conclusions are still the same, despite the smaller datasets.
+    X, y = datasets.make_hastie_10_2(n_samples=2000, random_state=global_random_seed)
 
-    X_train, X_test = X[:2000], X[2000:]
-    y_train, y_test = y[:2000], y[2000:]
+    split_idx = 500
+    X_train, X_test = X[:split_idx], X[split_idx:]
+    y_train, y_test = y[:split_idx], y[split_idx:]
 
     # Increasing the number of trees should decrease the test error
     common_params = {
@@ -96,13 +97,13 @@ def test_classification_synthetic(loss, global_random_seed):
         "loss": loss,
         "random_state": global_random_seed,
     }
-    gbrt_100_stumps = GradientBoostingClassifier(n_estimators=100, **common_params)
-    gbrt_100_stumps.fit(X_train, y_train)
+    gbrt_10_stumps = GradientBoostingClassifier(n_estimators=10, **common_params)
+    gbrt_10_stumps.fit(X_train, y_train)
 
-    gbrt_200_stumps = GradientBoostingClassifier(n_estimators=200, **common_params)
-    gbrt_200_stumps.fit(X_train, y_train)
+    gbrt_50_stumps = GradientBoostingClassifier(n_estimators=50, **common_params)
+    gbrt_50_stumps.fit(X_train, y_train)
 
-    assert gbrt_100_stumps.score(X_test, y_test) < gbrt_200_stumps.score(X_test, y_test)
+    assert gbrt_10_stumps.score(X_test, y_test) < gbrt_50_stumps.score(X_test, y_test)
 
     # Decision stumps are better suited for this dataset with a large number of
     # estimators.
@@ -674,9 +675,8 @@ def test_oob_multilcass_iris():
 
 def test_verbose_output():
     # Check verbose=1 does not cause error.
-    from io import StringIO
-
     import sys
+    from io import StringIO
 
     old_stdout = sys.stdout
     sys.stdout = StringIO()
@@ -706,8 +706,8 @@ def test_verbose_output():
 
 def test_more_verbose_output():
     # Check verbose=2 does not cause error.
-    from io import StringIO
     import sys
+    from io import StringIO
 
     old_stdout = sys.stdout
     sys.stdout = StringIO()
diff --git a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
index e710be9504be3..df92c68801da2 100644
--- a/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
+++ b/sklearn/ensemble/tests/test_gradient_boosting_loss_functions.py
@@ -2,22 +2,25 @@
 Testing for the gradient boosting loss functions and initial estimators.
 """
 from itertools import product
+
 import numpy as np
-from numpy.testing import assert_allclose
 import pytest
+from numpy.testing import assert_allclose
 from pytest import approx
 
-from sklearn.utils import check_random_state
+from sklearn.ensemble._gb_losses import (
+    LOSS_FUNCTIONS,
+    BinomialDeviance,
+    ExponentialLoss,
+    HuberLossFunction,
+    LeastAbsoluteError,
+    LeastSquaresError,
+    MultinomialDeviance,
+    QuantileLossFunction,
+    RegressionLossFunction,
+)
 from sklearn.metrics import mean_pinball_loss
-from sklearn.ensemble._gb_losses import RegressionLossFunction
-from sklearn.ensemble._gb_losses import LeastSquaresError
-from sklearn.ensemble._gb_losses import LeastAbsoluteError
-from sklearn.ensemble._gb_losses import HuberLossFunction
-from sklearn.ensemble._gb_losses import QuantileLossFunction
-from sklearn.ensemble._gb_losses import BinomialDeviance
-from sklearn.ensemble._gb_losses import MultinomialDeviance
-from sklearn.ensemble._gb_losses import ExponentialLoss
-from sklearn.ensemble._gb_losses import LOSS_FUNCTIONS
+from sklearn.utils import check_random_state
 
 
 def test_binomial_deviance():
diff --git a/sklearn/ensemble/tests/test_iforest.py b/sklearn/ensemble/tests/test_iforest.py
index 7650dd5c14ce4..854ebdb701014 100644
--- a/sklearn/ensemble/tests/test_iforest.py
+++ b/sklearn/ensemble/tests/test_iforest.py
@@ -6,27 +6,25 @@
 #          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
 # License: BSD 3 clause
 
-import pytest
 import warnings
+from unittest.mock import Mock, patch
 
 import numpy as np
+import pytest
+from scipy.sparse import csc_matrix, csr_matrix
 
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils._testing import assert_allclose
-
-from sklearn.model_selection import ParameterGrid
+from sklearn.datasets import load_diabetes, load_iris, make_classification
 from sklearn.ensemble import IsolationForest
 from sklearn.ensemble._iforest import _average_path_length
-from sklearn.model_selection import train_test_split
-from sklearn.datasets import load_diabetes, load_iris, make_classification
-from sklearn.utils import check_random_state
 from sklearn.metrics import roc_auc_score
-
-from scipy.sparse import csc_matrix, csr_matrix
-from unittest.mock import Mock, patch
-
+from sklearn.model_selection import ParameterGrid, train_test_split
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 
 # load iris & diabetes dataset
 iris = load_iris()
diff --git a/sklearn/ensemble/tests/test_stacking.py b/sklearn/ensemble/tests/test_stacking.py
index 2c04171fcd0f4..006b9cdb9e966 100644
--- a/sklearn/ensemble/tests/test_stacking.py
+++ b/sklearn/ensemble/tests/test_stacking.py
@@ -3,55 +3,47 @@
 # Authors: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 # License: BSD 3 clause
 
-import pytest
+from unittest.mock import Mock
+
 import numpy as np
-from numpy.testing import assert_array_equal
+import pytest
 import scipy.sparse as sparse
+from numpy.testing import assert_array_equal
 
-from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
-from sklearn.base import RegressorMixin
-from sklearn.base import clone
-
-from sklearn.exceptions import ConvergenceWarning
-
-from sklearn.datasets import load_iris
-from sklearn.datasets import load_diabetes
-from sklearn.datasets import load_breast_cancer
-from sklearn.datasets import make_regression
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_multilabel_classification
-
-from sklearn.dummy import DummyClassifier
-from sklearn.dummy import DummyRegressor
-from sklearn.linear_model import LogisticRegression
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import Ridge
-from sklearn.linear_model import RidgeClassifier
-from sklearn.svm import LinearSVC
-from sklearn.svm import LinearSVR
-from sklearn.svm import SVC
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import RandomForestRegressor
+from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, clone
+from sklearn.datasets import (
+    load_breast_cancer,
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import (
+    RandomForestClassifier,
+    RandomForestRegressor,
+    StackingClassifier,
+    StackingRegressor,
+)
+from sklearn.exceptions import ConvergenceWarning, NotFittedError
+from sklearn.linear_model import (
+    LinearRegression,
+    LogisticRegression,
+    Ridge,
+    RidgeClassifier,
+)
+from sklearn.model_selection import KFold, StratifiedKFold, train_test_split
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.neural_network import MLPClassifier
 from sklearn.preprocessing import scale
-
-from sklearn.ensemble import StackingClassifier
-from sklearn.ensemble import StackingRegressor
-
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import StratifiedKFold
-from sklearn.model_selection import KFold
-
+from sklearn.svm import SVC, LinearSVC, LinearSVR
 from sklearn.utils._mocking import CheckingClassifier
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_allclose_dense_sparse
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.exceptions import NotFittedError
-
-from unittest.mock import Mock
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    ignore_warnings,
+)
 
 diabetes = load_diabetes()
 X_diabetes, y_diabetes = diabetes.data, diabetes.target
diff --git a/sklearn/ensemble/tests/test_voting.py b/sklearn/ensemble/tests/test_voting.py
index 56db8b3c7fbf5..52734fc031fde 100644
--- a/sklearn/ensemble/tests/test_voting.py
+++ b/sklearn/ensemble/tests/test_voting.py
@@ -1,30 +1,34 @@
 """Testing for the VotingClassifier and VotingRegressor"""
 
-import pytest
 import re
+
 import numpy as np
+import pytest
 
-from sklearn.utils._testing import assert_almost_equal, assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.exceptions import NotFittedError
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import LogisticRegression
-from sklearn.naive_bayes import GaussianNB
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.ensemble import VotingClassifier, VotingRegressor
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.model_selection import GridSearchCV
 from sklearn import datasets
-from sklearn.model_selection import cross_val_score, train_test_split
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
 from sklearn.datasets import make_multilabel_classification
-from sklearn.svm import SVC
+from sklearn.dummy import DummyRegressor
+from sklearn.ensemble import (
+    RandomForestClassifier,
+    RandomForestRegressor,
+    VotingClassifier,
+    VotingRegressor,
+)
+from sklearn.exceptions import NotFittedError
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
 from sklearn.multiclass import OneVsRestClassifier
+from sklearn.naive_bayes import GaussianNB
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn.base import BaseEstimator, ClassifierMixin, clone
-from sklearn.dummy import DummyRegressor
 from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 # Load datasets
 iris = datasets.load_iris()
diff --git a/sklearn/ensemble/tests/test_weight_boosting.py b/sklearn/ensemble/tests/test_weight_boosting.py
index a5b0f7a49ce47..07d03be9721c0 100755
--- a/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/sklearn/ensemble/tests/test_weight_boosting.py
@@ -1,33 +1,28 @@
 """Testing for the boost module (sklearn.ensemble.boost)."""
 
-import numpy as np
-import pytest
 import re
 
-from scipy.sparse import csc_matrix
-from scipy.sparse import csr_matrix
-from scipy.sparse import coo_matrix
-from scipy.sparse import dok_matrix
-from scipy.sparse import lil_matrix
-
-from sklearn.utils._testing import assert_array_equal, assert_array_less
-from sklearn.utils._testing import assert_array_almost_equal
+import numpy as np
+import pytest
+from scipy.sparse import coo_matrix, csc_matrix, csr_matrix, dok_matrix, lil_matrix
 
-from sklearn.base import BaseEstimator
-from sklearn.base import clone
+from sklearn import datasets
+from sklearn.base import BaseEstimator, clone
 from sklearn.dummy import DummyClassifier, DummyRegressor
-from sklearn.linear_model import LinearRegression
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import GridSearchCV
-from sklearn.ensemble import AdaBoostClassifier
-from sklearn.ensemble import AdaBoostRegressor
+from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
 from sklearn.ensemble._weight_boosting import _samme_proba
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.svm import SVC, SVR
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.utils import shuffle
 from sklearn.utils._mocking import NoSampleWeightWrapper
-from sklearn import datasets
-
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_array_less,
+)
 
 # Common random state
 rng = np.random.RandomState(0)
@@ -481,9 +476,9 @@ def test_multidimensional_X():
     """
     rng = np.random.RandomState(0)
 
-    X = rng.randn(50, 3, 3)
-    yc = rng.choice([0, 1], 50)
-    yr = rng.randn(50)
+    X = rng.randn(51, 3, 3)
+    yc = rng.choice([0, 1], 51)
+    yr = rng.randn(51)
 
     boost = AdaBoostClassifier(DummyClassifier(strategy="most_frequent"))
     boost.fit(X, yc)
@@ -669,3 +664,45 @@ def test_deprecated_base_estimator_parameters_can_be_set():
 
     with pytest.warns(FutureWarning, match="Parameter 'base_estimator' of"):
         clf.set_params(base_estimator__max_depth=2)
+
+
+@pytest.mark.parametrize("algorithm", ["SAMME", "SAMME.R"])
+def test_adaboost_decision_function(algorithm, global_random_seed):
+    """Check that the decision function respects the symmetric constraint for weak
+    learners.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/26520
+    """
+    n_classes = 3
+    X, y = datasets.make_classification(
+        n_classes=n_classes, n_clusters_per_class=1, random_state=global_random_seed
+    )
+    clf = AdaBoostClassifier(
+        n_estimators=1, random_state=global_random_seed, algorithm=algorithm
+    ).fit(X, y)
+
+    y_score = clf.decision_function(X)
+    assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+    if algorithm == "SAMME":
+        # With a single learner, we expect to have a decision function in
+        # {1, - 1 / (n_classes - 1)}.
+        assert set(np.unique(y_score)) == {1, -1 / (n_classes - 1)}
+
+    # We can assert the same for staged_decision_function since we have a single learner
+    for y_score in clf.staged_decision_function(X):
+        assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+        if algorithm == "SAMME":
+            # With a single learner, we expect to have a decision function in
+            # {1, - 1 / (n_classes - 1)}.
+            assert set(np.unique(y_score)) == {1, -1 / (n_classes - 1)}
+
+    clf.set_params(n_estimators=5).fit(X, y)
+
+    y_score = clf.decision_function(X)
+    assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+    for y_score in clf.staged_decision_function(X):
+        assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
diff --git a/sklearn/experimental/enable_halving_search_cv.py b/sklearn/experimental/enable_halving_search_cv.py
index f6937b0d14c01..dd399ef35b6f7 100644
--- a/sklearn/experimental/enable_halving_search_cv.py
+++ b/sklearn/experimental/enable_halving_search_cv.py
@@ -19,13 +19,12 @@
 flake8 to ignore the import, which appears as unused.
 """
 
+from .. import model_selection
 from ..model_selection._search_successive_halving import (
-    HalvingRandomSearchCV,
     HalvingGridSearchCV,
+    HalvingRandomSearchCV,
 )
 
-from .. import model_selection
-
 # use settattr to avoid mypy errors when monkeypatching
 setattr(model_selection, "HalvingRandomSearchCV", HalvingRandomSearchCV)
 setattr(model_selection, "HalvingGridSearchCV", HalvingGridSearchCV)
diff --git a/sklearn/experimental/enable_hist_gradient_boosting.py b/sklearn/experimental/enable_hist_gradient_boosting.py
index f0416ac013e96..d287400c7999f 100644
--- a/sklearn/experimental/enable_hist_gradient_boosting.py
+++ b/sklearn/experimental/enable_hist_gradient_boosting.py
@@ -12,7 +12,6 @@
 
 import warnings
 
-
 warnings.warn(
     "Since version 1.0, "
     "it is not needed to import enable_hist_gradient_boosting anymore. "
diff --git a/sklearn/experimental/enable_iterative_imputer.py b/sklearn/experimental/enable_iterative_imputer.py
index 9ef9f6a0dbdf0..0b906961ca184 100644
--- a/sklearn/experimental/enable_iterative_imputer.py
+++ b/sklearn/experimental/enable_iterative_imputer.py
@@ -12,8 +12,8 @@
     >>> from sklearn.impute import IterativeImputer
 """
 
-from ..impute._iterative import IterativeImputer
 from .. import impute
+from ..impute._iterative import IterativeImputer
 
 # use settattr to avoid mypy errors when monkeypatching
 setattr(impute, "IterativeImputer", IterativeImputer)
diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py
index a9c1496181b3b..f4db85303f4b6 100644
--- a/sklearn/feature_extraction/__init__.py
+++ b/sklearn/feature_extraction/__init__.py
@@ -4,10 +4,10 @@
 images.
 """
 
+from . import text
 from ._dict_vectorizer import DictVectorizer
 from ._hash import FeatureHasher
-from .image import img_to_graph, grid_to_graph
-from . import text
+from .image import grid_to_graph, img_to_graph
 
 __all__ = [
     "DictVectorizer",
diff --git a/sklearn/feature_extraction/_dict_vectorizer.py b/sklearn/feature_extraction/_dict_vectorizer.py
index 60e2cb3b7ad84..0b9ea2f202cb0 100644
--- a/sklearn/feature_extraction/_dict_vectorizer.py
+++ b/sklearn/feature_extraction/_dict_vectorizer.py
@@ -3,15 +3,14 @@
 # License: BSD 3 clause
 
 from array import array
-from collections.abc import Mapping, Iterable
-from operator import itemgetter
+from collections.abc import Iterable, Mapping
 from numbers import Number
+from operator import itemgetter
 
 import numpy as np
 import scipy.sparse as sp
 
-from ..base import BaseEstimator, TransformerMixin
-from ..base import _fit_context
+from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import check_array
 from ..utils.validation import check_is_fitted
 
@@ -43,6 +42,9 @@ class DictVectorizer(TransformerMixin, BaseEstimator):
     Features that do not occur in a sample (mapping) will have a zero value
     in the resulting array/matrix.
 
+    For an efficiency comparision of the different feature extractors, see
+    :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.
+
     Read more in the :ref:`User Guide <dict_feature_extraction>`.
 
     Parameters
diff --git a/sklearn/feature_extraction/_hash.py b/sklearn/feature_extraction/_hash.py
index e1b5e5f2561fe..2019552ae65bb 100644
--- a/sklearn/feature_extraction/_hash.py
+++ b/sklearn/feature_extraction/_hash.py
@@ -1,16 +1,15 @@
 # Author: Lars Buitinck
 # License: BSD 3 clause
 
-from numbers import Integral
 from itertools import chain
+from numbers import Integral
 
 import numpy as np
 import scipy.sparse as sp
 
-from ..base import BaseEstimator, TransformerMixin
-from ..base import _fit_context
-from ._hashing_fast import transform as _hashing_transform
+from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils._param_validation import Interval, StrOptions
+from ._hashing_fast import transform as _hashing_transform
 
 
 def _iteritems(d):
@@ -35,6 +34,9 @@ class FeatureHasher(TransformerMixin, BaseEstimator):
     where memory is tight, e.g. when running prediction code on embedded
     devices.
 
+    For an efficiency comparision of the different feature extractors, see
+    :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.
+
     Read more in the :ref:`User Guide <feature_hashing>`.
 
     .. versionadded:: 0.13
diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index beea3e23e0adc..a2a23b9ec4f3d 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -11,15 +11,14 @@
 
 from itertools import product
 from numbers import Integral, Number, Real
+
 import numpy as np
-from scipy import sparse
 from numpy.lib.stride_tricks import as_strided
+from scipy import sparse
 
-from ..base import BaseEstimator, TransformerMixin
-from ..base import _fit_context
+from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import check_array, check_random_state
-from ..utils._param_validation import Hidden, Interval, validate_params
-from ..utils._param_validation import RealNotInt
+from ..utils._param_validation import Hidden, Interval, RealNotInt, validate_params
 
 __all__ = [
     "PatchExtractor",
@@ -77,7 +76,7 @@ def _mask_edges_weights(mask, edges, weights=None):
     """Apply a mask to edges (weighted or not)"""
     inds = np.arange(mask.size)
     inds = inds[mask.ravel()]
-    ind_mask = np.logical_and(np.in1d(edges[0], inds), np.in1d(edges[1], inds))
+    ind_mask = np.logical_and(np.isin(edges[0], inds), np.isin(edges[1], inds))
     edges = edges[:, ind_mask]
     if weights is not None:
         weights = weights[ind_mask]
@@ -146,7 +145,8 @@ def _to_graph(
         "mask": [None, np.ndarray],
         "return_as": [type],
         "dtype": "no_validation",  # validation delegated to numpy
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def img_to_graph(img, *, mask=None, return_as=sparse.coo_matrix, dtype=None):
     """Graph of the pixel-to-pixel gradient connections.
@@ -197,7 +197,8 @@ def img_to_graph(img, *, mask=None, return_as=sparse.coo_matrix, dtype=None):
         "mask": [None, np.ndarray],
         "return_as": [type],
         "dtype": "no_validation",  # validation delegated to numpy
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def grid_to_graph(
     n_x, n_y, n_z=1, *, mask=None, return_as=sparse.coo_matrix, dtype=int
@@ -260,9 +261,9 @@ def _compute_n_patches(i_h, i_w, p_h, p_w, max_patches=None):
     p_w : int
         The width of a patch
     max_patches : int or float, default=None
-        The maximum number of patches to extract. If max_patches is a float
+        The maximum number of patches to extract. If `max_patches` is a float
         between 0 and 1, it is taken to be a proportion of the total number
-        of patches.
+        of patches. If `max_patches` is None, all possible patches are extracted.
     """
     n_h = i_h - p_h + 1
     n_w = i_w - p_w + 1
@@ -350,7 +351,8 @@ def _extract_patches(arr, patch_shape=8, extraction_step=1):
             None,
         ],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def extract_patches_2d(image, patch_size, *, max_patches=None, random_state=None):
     """Reshape a 2D image into a collection of patches.
@@ -450,7 +452,10 @@ def extract_patches_2d(image, patch_size, *, max_patches=None, random_state=None
         return patches
 
 
-@validate_params({"patches": [np.ndarray], "image_size": [tuple, Hidden(list)]})
+@validate_params(
+    {"patches": [np.ndarray], "image_size": [tuple, Hidden(list)]},
+    prefer_skip_nested_validation=True,
+)
 def reconstruct_from_patches_2d(patches, image_size):
     """Reconstruct the image from all of its patches.
 
diff --git a/sklearn/feature_extraction/tests/test_dict_vectorizer.py b/sklearn/feature_extraction/tests/test_dict_vectorizer.py
index c8b9aaa8b5c8a..3066d7669546b 100644
--- a/sklearn/feature_extraction/tests/test_dict_vectorizer.py
+++ b/sklearn/feature_extraction/tests/test_dict_vectorizer.py
@@ -3,12 +3,11 @@
 # License: BSD 3 clause
 
 from random import Random
-import numpy as np
-import scipy.sparse as sp
-from numpy.testing import assert_array_equal
-from numpy.testing import assert_allclose
 
+import numpy as np
 import pytest
+import scipy.sparse as sp
+from numpy.testing import assert_allclose, assert_array_equal
 
 from sklearn.feature_extraction import DictVectorizer
 from sklearn.feature_selection import SelectKBest, chi2
@@ -31,7 +30,9 @@ def test_dictvectorizer(sparse, dtype, sort, iterable):
 
     if sparse:
         # CSR matrices can't be compared for equality
-        assert_array_equal(X.A, v.transform(iter(D) if iterable else D).A)
+        assert_array_equal(
+            X.toarray(), v.transform(iter(D) if iterable else D).toarray()
+        )
     else:
         assert_array_equal(X, v.transform(iter(D) if iterable else D))
 
diff --git a/sklearn/feature_extraction/tests/test_feature_hasher.py b/sklearn/feature_extraction/tests/test_feature_hasher.py
index b074620f8c029..276d0d48b0770 100644
--- a/sklearn/feature_extraction/tests/test_feature_hasher.py
+++ b/sklearn/feature_extraction/tests/test_feature_hasher.py
@@ -1,6 +1,6 @@
 import numpy as np
-from numpy.testing import assert_array_equal
 import pytest
+from numpy.testing import assert_array_equal
 
 from sklearn.feature_extraction import FeatureHasher
 from sklearn.feature_extraction._hashing_fast import transform as _hashing_transform
@@ -125,7 +125,7 @@ def test_hash_empty_input():
     feature_hasher = FeatureHasher(n_features=n_features, input_type="string")
     X = feature_hasher.transform(raw_X)
 
-    assert_array_equal(X.A, np.zeros((len(raw_X), n_features)))
+    assert_array_equal(X.toarray(), np.zeros((len(raw_X), n_features)))
 
 
 def test_hasher_zeros():
diff --git a/sklearn/feature_extraction/tests/test_image.py b/sklearn/feature_extraction/tests/test_image.py
index 5a89062e7de19..375652c848db6 100644
--- a/sklearn/feature_extraction/tests/test_image.py
+++ b/sklearn/feature_extraction/tests/test_image.py
@@ -3,17 +3,17 @@
 # License: BSD 3 clause
 
 import numpy as np
+import pytest
 from scipy import ndimage
 from scipy.sparse.csgraph import connected_components
-import pytest
 
 from sklearn.feature_extraction.image import (
-    img_to_graph,
-    grid_to_graph,
-    extract_patches_2d,
-    reconstruct_from_patches_2d,
     PatchExtractor,
     _extract_patches,
+    extract_patches_2d,
+    grid_to_graph,
+    img_to_graph,
+    reconstruct_from_patches_2d,
 )
 
 
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 80a42aaea5af0..fc35053b40251 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -1,43 +1,37 @@
-from collections.abc import Mapping
+import pickle
 import re
+import warnings
+from collections import defaultdict
+from collections.abc import Mapping
+from functools import partial
+from io import StringIO
 
+import numpy as np
 import pytest
-import warnings
+from numpy.testing import assert_array_almost_equal, assert_array_equal
 from scipy import sparse
 
-from sklearn.feature_extraction.text import strip_tags
-from sklearn.feature_extraction.text import strip_accents_unicode
-from sklearn.feature_extraction.text import strip_accents_ascii
-
-from sklearn.feature_extraction.text import HashingVectorizer
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.feature_extraction.text import TfidfTransformer
-from sklearn.feature_extraction.text import TfidfVectorizer
-
-from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
-
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import GridSearchCV
+from sklearn.base import clone
+from sklearn.feature_extraction.text import (
+    ENGLISH_STOP_WORDS,
+    CountVectorizer,
+    HashingVectorizer,
+    TfidfTransformer,
+    TfidfVectorizer,
+    strip_accents_ascii,
+    strip_accents_unicode,
+    strip_tags,
+)
+from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
 from sklearn.pipeline import Pipeline
 from sklearn.svm import LinearSVC
-
-from sklearn.base import clone
-
-import numpy as np
-from numpy.testing import assert_array_almost_equal
-from numpy.testing import assert_array_equal
 from sklearn.utils import IS_PYPY
 from sklearn.utils._testing import (
+    assert_allclose_dense_sparse,
     assert_almost_equal,
     fails_if_pypy,
-    assert_allclose_dense_sparse,
     skip_if_32bit,
 )
-from collections import defaultdict
-from functools import partial
-import pickle
-from io import StringIO
 
 JUNK_FOOD_DOCS = (
     "the pizza pizza beer copyright",
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 3201e3a0d51bb..b2ef28de75766 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -12,29 +12,26 @@
 """
 
 import array
+import re
+import unicodedata
+import warnings
 from collections import defaultdict
 from collections.abc import Mapping
 from functools import partial
 from numbers import Integral
 from operator import itemgetter
-import re
-import unicodedata
-import warnings
 
 import numpy as np
 import scipy.sparse as sp
 
-from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from ..base import _fit_context
+from ..base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin, _fit_context
+from ..exceptions import NotFittedError
 from ..preprocessing import normalize
+from ..utils import _IS_32BIT
+from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
+from ..utils.validation import FLOAT_DTYPES, check_array, check_is_fitted
 from ._hash import FeatureHasher
 from ._stop_words import ENGLISH_STOP_WORDS
-from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES
-from ..utils import _IS_32BIT
-from ..exceptions import NotFittedError
-from ..utils._param_validation import StrOptions, Interval, HasMethods
-from ..utils._param_validation import RealNotInt
-
 
 __all__ = [
     "HashingVectorizer",
@@ -605,6 +602,9 @@ class HashingVectorizer(
 
     The hash function employed is the signed 32-bit version of Murmurhash3.
 
+    For an efficiency comparision of the different feature extractors, see
+    :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.
+
     Read more in the :ref:`User Guide <text_feature_extraction>`.
 
     Parameters
@@ -636,7 +636,7 @@ class HashingVectorizer(
         'ascii' is a fast method that only works on characters that have
         a direct ASCII mapping.
         'unicode' is a slightly slower method that works on any character.
-        None (default) does nothing.
+        None (default) means no character normalization is performed.
 
         Both 'ascii' and 'unicode' use NFKD normalization from
         :func:`unicodedata.normalize`.
@@ -920,7 +920,7 @@ def _more_tags(self):
 
 def _document_frequency(X):
     """Count the number of non-zero values for each feature in sparse X."""
-    if sp.isspmatrix_csr(X):
+    if sp.issparse(X) and X.format == "csr":
         return np.bincount(X.indices, minlength=X.shape[1])
     else:
         return np.diff(X.indptr)
@@ -936,6 +936,9 @@ class CountVectorizer(_VectorizerMixin, BaseEstimator):
     that does some kind of feature selection then the number of features will
     be equal to the vocabulary size found by analyzing the data.
 
+    For an efficiency comparision of the different feature extractors, see
+    :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.
+
     Read more in the :ref:`User Guide <text_feature_extraction>`.
 
     Parameters
@@ -967,7 +970,7 @@ class CountVectorizer(_VectorizerMixin, BaseEstimator):
         'ascii' is a fast method that only works on characters that have
         a direct ASCII mapping.
         'unicode' is a slightly slower method that works on any characters.
-        None (default) does nothing.
+        None (default) means no character normalization is performed.
 
         Both 'ascii' and 'unicode' use NFKD normalization from
         :func:`unicodedata.normalize`.
@@ -1548,7 +1551,7 @@ class TfidfTransformer(
           similarity between two vectors is their dot product when l2 norm has
           been applied.
         - 'l1': Sum of absolute values of vector elements is 1.
-          See :func:`preprocessing.normalize`.
+          See :func:`~sklearn.preprocessing.normalize`.
         - None: No normalization.
 
     use_idf : bool, default=True
@@ -1758,6 +1761,12 @@ class TfidfVectorizer(CountVectorizer):
     Equivalent to :class:`CountVectorizer` followed by
     :class:`TfidfTransformer`.
 
+    For an example of usage, see
+    :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py`.
+
+    For an efficiency comparision of the different feature extractors, see
+    :ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.
+
     Read more in the :ref:`User Guide <text_feature_extraction>`.
 
     Parameters
@@ -1789,7 +1798,7 @@ class TfidfVectorizer(CountVectorizer):
         'ascii' is a fast method that only works on characters that have
         a direct ASCII mapping.
         'unicode' is a slightly slower method that works on any characters.
-        None (default) does nothing.
+        None (default) means no character normalization is performed.
 
         Both 'ascii' and 'unicode' use NFKD normalization from
         :func:`unicodedata.normalize`.
@@ -1884,7 +1893,8 @@ class TfidfVectorizer(CountVectorizer):
     binary : bool, default=False
         If True, all non-zero term counts are set to 1. This does not mean
         outputs will have only 0/1 values, only that the tf term in tf-idf
-        is binary. (Set idf and normalization to False to get 0/1 outputs).
+        is binary. (Set `binary` to True, `use_idf` to False and
+        `norm` to None to get 0/1 outputs).
 
     dtype : dtype, default=float64
         Type of the matrix returned by fit_transform() or transform().
@@ -1896,7 +1906,7 @@ class TfidfVectorizer(CountVectorizer):
           similarity between two vectors is their dot product when l2 norm has
           been applied.
         - 'l1': Sum of absolute values of vector elements is 1.
-          See :func:`preprocessing.normalize`.
+          See :func:`~sklearn.preprocessing.normalize`.
         - None: No normalization.
 
     use_idf : bool, default=True
diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py
index ce5fbc10ee459..4fbc631155078 100644
--- a/sklearn/feature_selection/__init__.py
+++ b/sklearn/feature_selection/__init__.py
@@ -4,31 +4,25 @@
 recursive feature elimination algorithm.
 """
 
-from ._univariate_selection import chi2
-from ._univariate_selection import f_classif
-from ._univariate_selection import f_oneway
-from ._univariate_selection import f_regression
-from ._univariate_selection import r_regression
-from ._univariate_selection import SelectPercentile
-from ._univariate_selection import SelectKBest
-from ._univariate_selection import SelectFpr
-from ._univariate_selection import SelectFdr
-from ._univariate_selection import SelectFwe
-from ._univariate_selection import GenericUnivariateSelect
-
-from ._variance_threshold import VarianceThreshold
-
-from ._rfe import RFE
-from ._rfe import RFECV
-
+from ._base import SelectorMixin
 from ._from_model import SelectFromModel
-
+from ._mutual_info import mutual_info_classif, mutual_info_regression
+from ._rfe import RFE, RFECV
 from ._sequential import SequentialFeatureSelector
-
-from ._mutual_info import mutual_info_regression, mutual_info_classif
-
-from ._base import SelectorMixin
-
+from ._univariate_selection import (
+    GenericUnivariateSelect,
+    SelectFdr,
+    SelectFpr,
+    SelectFwe,
+    SelectKBest,
+    SelectPercentile,
+    chi2,
+    f_classif,
+    f_oneway,
+    f_regression,
+    r_regression,
+)
+from ._variance_threshold import VarianceThreshold
 
 __all__ = [
     "GenericUnivariateSelect",
diff --git a/sklearn/feature_selection/_base.py b/sklearn/feature_selection/_base.py
index 100af272038ad..9ede37c98c75b 100644
--- a/sklearn/feature_selection/_base.py
+++ b/sklearn/feature_selection/_base.py
@@ -8,16 +8,16 @@
 from operator import attrgetter
 
 import numpy as np
-from scipy.sparse import issparse, csc_matrix
+from scipy.sparse import csc_matrix, issparse
 
 from ..base import TransformerMixin
 from ..utils import (
+    _safe_indexing,
     check_array,
     safe_sqr,
 )
-from ..utils._tags import _safe_tags
-from ..utils import _safe_indexing
 from ..utils._set_output import _get_output_config
+from ..utils._tags import _safe_tags
 from ..utils.validation import _check_feature_names_in, check_is_fitted
 
 
diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
index 47f98d89e8abe..d3a287007bd49 100644
--- a/sklearn/feature_selection/_from_model.py
+++ b/sklearn/feature_selection/_from_model.py
@@ -2,20 +2,17 @@
 # License: BSD 3 clause
 
 from copy import deepcopy
-
-import numpy as np
 from numbers import Integral, Real
 
-from ._base import SelectorMixin
-from ._base import _get_feature_importances
-from ..base import BaseEstimator, clone, MetaEstimatorMixin
-from ..base import _fit_context
-from ..utils._tags import _safe_tags
-from ..utils.validation import check_is_fitted, check_scalar, _num_features
-from ..utils._param_validation import HasMethods, Interval, Options
+import numpy as np
 
+from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone
 from ..exceptions import NotFittedError
+from ..utils._param_validation import HasMethods, Interval, Options
+from ..utils._tags import _safe_tags
 from ..utils.metaestimators import available_if
+from ..utils.validation import _num_features, check_is_fitted, check_scalar
+from ._base import SelectorMixin, _get_feature_importances
 
 
 def _calculate_threshold(estimator, importances, threshold):
diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py
index 9cacfc3890784..bd62495ac28a3 100644
--- a/sklearn/feature_selection/_mutual_info.py
+++ b/sklearn/feature_selection/_mutual_info.py
@@ -1,18 +1,19 @@
 # Author: Nikolay Mayorov <n59_ru@hotmail.com>
 # License: 3-clause BSD
 
-import numpy as np
 from numbers import Integral
+
+import numpy as np
 from scipy.sparse import issparse
 from scipy.special import digamma
 
 from ..metrics.cluster import mutual_info_score
-from ..neighbors import NearestNeighbors, KDTree
+from ..neighbors import KDTree, NearestNeighbors
 from ..preprocessing import scale
 from ..utils import check_random_state
-from ..utils.validation import check_array, check_X_y
-from ..utils.multiclass import check_classification_targets
 from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.multiclass import check_classification_targets
+from ..utils.validation import check_array, check_X_y
 
 
 def _compute_mi_cc(x, y, n_neighbors):
@@ -279,15 +280,12 @@ def _estimate_mi(
 
     rng = check_random_state(random_state)
     if np.any(continuous_mask):
-        if copy:
-            X = X.copy()
-
+        X = X.astype(np.float64, copy=copy)
         X[:, continuous_mask] = scale(
             X[:, continuous_mask], with_mean=False, copy=False
         )
 
         # Add small noise to continuous features as advised in Kraskov et. al.
-        X = X.astype(np.float64, copy=False)
         means = np.maximum(1, np.mean(np.abs(X[:, continuous_mask]), axis=0))
         X[:, continuous_mask] += (
             1e-10
@@ -319,7 +317,8 @@ def _estimate_mi(
         "n_neighbors": [Interval(Integral, 1, None, closed="left")],
         "copy": ["boolean"],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mutual_info_regression(
     X, y, *, discrete_features="auto", n_neighbors=3, copy=True, random_state=None
@@ -408,7 +407,8 @@ def mutual_info_regression(
         "n_neighbors": [Interval(Integral, 1, None, closed="left")],
         "copy": ["boolean"],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mutual_info_classif(
     X, y, *, discrete_features="auto", n_neighbors=3, copy=True, random_state=None
diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
index 932d66449ae22..11cf083992653 100644
--- a/sklearn/feature_selection/_rfe.py
+++ b/sklearn/feature_selection/_rfe.py
@@ -6,28 +6,21 @@
 
 """Recursive feature elimination for feature ranking"""
 
-import numpy as np
 from numbers import Integral
-from joblib import effective_n_jobs
 
+import numpy as np
+from joblib import effective_n_jobs
 
-from ..utils.metaestimators import available_if
-from ..utils.metaestimators import _safe_split
-from ..utils._param_validation import HasMethods, Interval
-from ..utils._param_validation import RealNotInt
-from ..utils._tags import _safe_tags
-from ..utils.validation import check_is_fitted
-from ..utils.parallel import delayed, Parallel
-from ..base import BaseEstimator
-from ..base import MetaEstimatorMixin
-from ..base import clone
-from ..base import is_classifier
-from ..base import _fit_context
+from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone, is_classifier
+from ..metrics import check_scoring
 from ..model_selection import check_cv
 from ..model_selection._validation import _score
-from ..metrics import check_scoring
-from ._base import SelectorMixin
-from ._base import _get_feature_importances
+from ..utils._param_validation import HasMethods, Interval, RealNotInt
+from ..utils._tags import _safe_tags
+from ..utils.metaestimators import _safe_split, available_if
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import check_is_fitted
+from ._base import SelectorMixin, _get_feature_importances
 
 
 def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer):
diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py
index 0fbe91273053b..f124fff8fed3e 100644
--- a/sklearn/feature_selection/_sequential.py
+++ b/sklearn/feature_selection/_sequential.py
@@ -5,15 +5,13 @@
 
 import numpy as np
 
-from ._base import SelectorMixin
-from ..base import BaseEstimator, MetaEstimatorMixin, clone, is_classifier
-from ..base import _fit_context
-from ..utils._param_validation import HasMethods, Interval, StrOptions
-from ..utils._param_validation import RealNotInt
+from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone, is_classifier
+from ..metrics import get_scorer_names
+from ..model_selection import check_cv, cross_val_score
+from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
 from ..utils._tags import _safe_tags
 from ..utils.validation import check_is_fitted
-from ..model_selection import cross_val_score, check_cv
-from ..metrics import get_scorer_names
+from ._base import SelectorMixin
 
 
 class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
@@ -85,9 +83,11 @@ class SequentialFeatureSelector(SelectorMixin, MetaEstimatorMixin, BaseEstimator
         - An iterable yielding (train, test) splits as arrays of indices.
 
         For integer/None inputs, if the estimator is a classifier and ``y`` is
-        either binary or multiclass, :class:`StratifiedKFold` is used. In all
-        other cases, :class:`KFold` is used. These splitters are instantiated
-        with `shuffle=False` so the splits will be the same across calls.
+        either binary or multiclass,
+        :class:`~sklearn.model_selection.StratifiedKFold` is used. In all other
+        cases, :class:`~sklearn.model_selection.KFold` is used. These splitters
+        are instantiated with `shuffle=False` so the splits will be the same
+        across calls.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index f4355c39f88cd..25e84518413dc 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -5,20 +5,19 @@
 # License: BSD 3 clause
 
 
-import numpy as np
 import warnings
-
 from numbers import Integral, Real
+
+import numpy as np
 from scipy import special, stats
 from scipy.sparse import issparse
 
-from ..base import BaseEstimator
-from ..base import _fit_context
+from ..base import BaseEstimator, _fit_context
 from ..preprocessing import LabelBinarizer
-from ..utils import as_float_array, check_array, check_X_y, safe_sqr, safe_mask
-from ..utils.extmath import safe_sparse_dot, row_norms
-from ..utils.validation import check_is_fitted
+from ..utils import as_float_array, check_array, check_X_y, safe_mask, safe_sqr
 from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.extmath import row_norms, safe_sparse_dot
+from ..utils.validation import check_is_fitted
 from ._base import SelectorMixin
 
 
@@ -122,7 +121,8 @@ def f_oneway(*args):
     {
         "X": ["array-like", "sparse matrix"],
         "y": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def f_classif(X, y):
     """Compute the ANOVA F-value for the provided sample.
@@ -178,7 +178,8 @@ def _chisquare(f_obs, f_exp):
     {
         "X": ["array-like", "sparse matrix"],
         "y": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def chi2(X, y):
     """Compute chi-squared stats between each non-negative feature and class.
@@ -258,7 +259,8 @@ def chi2(X, y):
         "y": ["array-like"],
         "center": ["boolean"],
         "force_finite": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def r_regression(X, y, *, center=True, force_finite=True):
     """Compute Pearson's r for each features and the target.
@@ -349,7 +351,8 @@ def r_regression(X, y, *, center=True, force_finite=True):
         "y": ["array-like"],
         "center": ["boolean"],
         "force_finite": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def f_regression(X, y, *, center=True, force_finite=True):
     """Univariate linear regression tests returning F-statistic and p-values.
diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py
index 073a22c6ad92b..f97c75db1e34b 100644
--- a/sklearn/feature_selection/_variance_threshold.py
+++ b/sklearn/feature_selection/_variance_threshold.py
@@ -3,12 +3,12 @@
 from numbers import Real
 
 import numpy as np
-from ..base import BaseEstimator
-from ..base import _fit_context
-from ._base import SelectorMixin
+
+from ..base import BaseEstimator, _fit_context
+from ..utils._param_validation import Interval
 from ..utils.sparsefuncs import mean_variance_axis, min_max_axis
 from ..utils.validation import check_is_fitted
-from ..utils._param_validation import Interval
+from ._base import SelectorMixin
 
 
 class VarianceThreshold(SelectorMixin, BaseEstimator):
diff --git a/sklearn/feature_selection/tests/test_base.py b/sklearn/feature_selection/tests/test_base.py
index 9869a1c03e677..bf883797ddabd 100644
--- a/sklearn/feature_selection/tests/test_base.py
+++ b/sklearn/feature_selection/tests/test_base.py
@@ -1,8 +1,7 @@
 import numpy as np
 import pytest
-from scipy import sparse as sp
-
 from numpy.testing import assert_array_equal
+from scipy import sparse as sp
 
 from sklearn.base import BaseEstimator
 from sklearn.feature_selection._base import SelectorMixin
diff --git a/sklearn/feature_selection/tests/test_chi2.py b/sklearn/feature_selection/tests/test_chi2.py
index d7d830459e455..4fdc652a998a9 100644
--- a/sklearn/feature_selection/tests/test_chi2.py
+++ b/sklearn/feature_selection/tests/test_chi2.py
@@ -7,13 +7,12 @@
 
 import numpy as np
 import pytest
-from scipy.sparse import coo_matrix, csr_matrix
 import scipy.stats
+from scipy.sparse import coo_matrix, csr_matrix
 
 from sklearn.feature_selection import SelectKBest, chi2
 from sklearn.feature_selection._univariate_selection import _chisquare
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
+from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
 
 # Feature 0 is highly informative for class 1;
 # feature 1 is the same everywhere;
diff --git a/sklearn/feature_selection/tests/test_feature_select.py b/sklearn/feature_selection/tests/test_feature_select.py
index ff51243bb1378..b182aca270e06 100644
--- a/sklearn/feature_selection/tests/test_feature_select.py
+++ b/sklearn/feature_selection/tests/test_feature_select.py
@@ -3,35 +3,36 @@
 """
 import itertools
 import warnings
-import numpy as np
-from numpy.testing import assert_allclose
-from scipy import stats, sparse
 
+import numpy as np
 import pytest
+from numpy.testing import assert_allclose
+from scipy import sparse, stats
 
-from sklearn.utils._testing import assert_almost_equal, _convert_container
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils import safe_mask
-
-from sklearn.datasets import make_classification, make_regression, load_iris
+from sklearn.datasets import load_iris, make_classification, make_regression
 from sklearn.feature_selection import (
+    GenericUnivariateSelect,
+    SelectFdr,
+    SelectFpr,
+    SelectFwe,
+    SelectKBest,
+    SelectPercentile,
     chi2,
     f_classif,
     f_oneway,
     f_regression,
-    GenericUnivariateSelect,
     mutual_info_classif,
     mutual_info_regression,
     r_regression,
-    SelectPercentile,
-    SelectKBest,
-    SelectFpr,
-    SelectFdr,
-    SelectFwe,
 )
-
+from sklearn.utils import safe_mask
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 
 ##############################################################################
 # Test the score functions
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 7b408201bc7f5..aa802136c2f39 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -1,34 +1,36 @@
 import re
-import pytest
-import numpy as np
 import warnings
 from unittest.mock import Mock
 
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import skip_if_32bit
-from sklearn.utils._testing import MinimalClassifier
+import numpy as np
+import pytest
 
 from sklearn import datasets
+from sklearn.base import BaseEstimator
 from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression
 from sklearn.datasets import make_friedman1
+from sklearn.decomposition import PCA
+from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
 from sklearn.exceptions import NotFittedError
+from sklearn.feature_selection import SelectFromModel
 from sklearn.linear_model import (
-    LogisticRegression,
-    SGDClassifier,
-    Lasso,
-    LassoCV,
     ElasticNet,
     ElasticNetCV,
+    Lasso,
+    LassoCV,
+    LogisticRegression,
+    PassiveAggressiveClassifier,
+    SGDClassifier,
 )
-from sklearn.svm import LinearSVC
-from sklearn.feature_selection import SelectFromModel
-from sklearn.ensemble import RandomForestClassifier, HistGradientBoostingClassifier
-from sklearn.linear_model import PassiveAggressiveClassifier
-from sklearn.base import BaseEstimator
 from sklearn.pipeline import make_pipeline
-from sklearn.decomposition import PCA
+from sklearn.svm import LinearSVC
+from sklearn.utils._testing import (
+    MinimalClassifier,
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    skip_if_32bit,
+)
 
 
 class NaNTag(BaseEstimator):
diff --git a/sklearn/feature_selection/tests/test_mutual_info.py b/sklearn/feature_selection/tests/test_mutual_info.py
index f39e4a5738b21..349147f66e36c 100644
--- a/sklearn/feature_selection/tests/test_mutual_info.py
+++ b/sklearn/feature_selection/tests/test_mutual_info.py
@@ -2,13 +2,13 @@
 import pytest
 from scipy.sparse import csr_matrix
 
+from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
+from sklearn.feature_selection._mutual_info import _compute_mi
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import (
-    assert_array_equal,
     assert_allclose,
+    assert_array_equal,
 )
-from sklearn.feature_selection._mutual_info import _compute_mi
-from sklearn.feature_selection import mutual_info_regression, mutual_info_classif
 
 
 def test_compute_mi_dd():
@@ -236,3 +236,18 @@ def test_mutual_information_symmetry_classif_regression(correlated, global_rando
     )
 
     assert mi_classif == pytest.approx(mi_regression)
+
+
+def test_mutual_info_regression_X_int_dtype(global_random_seed):
+    """Check that results agree when X is integer dtype and float dtype.
+
+    Non-regression test for Issue #26696.
+    """
+    rng = np.random.RandomState(global_random_seed)
+    X = rng.randint(100, size=(100, 10))
+    X_float = X.astype(np.float64, copy=True)
+    y = rng.randint(100, size=100)
+
+    expected = mutual_info_regression(X_float, y, random_state=global_random_seed)
+    result = mutual_info_regression(X, y, random_state=global_random_seed)
+    assert_allclose(result, expected)
diff --git a/sklearn/feature_selection/tests/test_rfe.py b/sklearn/feature_selection/tests/test_rfe.py
index fa7aeea19be6c..0f141f3461d7f 100644
--- a/sklearn/feature_selection/tests/test_rfe.py
+++ b/sklearn/feature_selection/tests/test_rfe.py
@@ -4,31 +4,26 @@
 
 from operator import attrgetter
 
-import pytest
 import numpy as np
-from numpy.testing import assert_array_almost_equal, assert_array_equal, assert_allclose
+import pytest
+from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal
 from scipy import sparse
 
 from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.cross_decomposition import PLSCanonical, PLSRegression, CCA
-from sklearn.feature_selection import RFE, RFECV
+from sklearn.compose import TransformedTargetRegressor
+from sklearn.cross_decomposition import CCA, PLSCanonical, PLSRegression
 from sklearn.datasets import load_iris, make_friedman1
-from sklearn.metrics import zero_one_loss
-from sklearn.svm import SVC, SVR, LinearSVR
-from sklearn.linear_model import LogisticRegression
 from sklearn.ensemble import RandomForestClassifier
-from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import GroupKFold
-from sklearn.compose import TransformedTargetRegressor
+from sklearn.feature_selection import RFE, RFECV
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import get_scorer, make_scorer, zero_one_loss
+from sklearn.model_selection import GroupKFold, cross_val_score
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
-
+from sklearn.svm import SVC, SVR, LinearSVR
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import ignore_warnings
 
-from sklearn.metrics import make_scorer
-from sklearn.metrics import get_scorer
-
 
 class MockClassifier:
     """
@@ -278,8 +273,8 @@ def test_rfecv_mockclassifier():
 
 def test_rfecv_verbose_output():
     # Check verbose=1 is producing an output.
-    from io import StringIO
     import sys
+    from io import StringIO
 
     sys.stdout = StringIO()
 
diff --git a/sklearn/feature_selection/tests/test_sequential.py b/sklearn/feature_selection/tests/test_sequential.py
index a1ea1d4677dd4..a515bf22cdda3 100644
--- a/sklearn/feature_selection/tests/test_sequential.py
+++ b/sklearn/feature_selection/tests/test_sequential.py
@@ -1,17 +1,17 @@
+import numpy as np
 import pytest
 import scipy
-import numpy as np
 from numpy.testing import assert_array_equal
 
-from sklearn.preprocessing import StandardScaler
-from sklearn.pipeline import make_pipeline
+from sklearn.cluster import KMeans
+from sklearn.datasets import make_blobs, make_classification, make_regression
+from sklearn.ensemble import HistGradientBoostingRegressor
 from sklearn.feature_selection import SequentialFeatureSelector
-from sklearn.datasets import make_regression, make_blobs, make_classification
 from sklearn.linear_model import LinearRegression
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.model_selection import cross_val_score, LeaveOneGroupOut
-from sklearn.cluster import KMeans
+from sklearn.model_selection import LeaveOneGroupOut, cross_val_score
 from sklearn.neighbors import KNeighborsClassifier
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 
 
 def test_bad_n_features_to_select():
diff --git a/sklearn/feature_selection/tests/test_variance_threshold.py b/sklearn/feature_selection/tests/test_variance_threshold.py
index 4bce46556a666..190d016952980 100644
--- a/sklearn/feature_selection/tests/test_variance_threshold.py
+++ b/sklearn/feature_selection/tests/test_variance_threshold.py
@@ -1,11 +1,9 @@
 import numpy as np
 import pytest
-
-from sklearn.utils._testing import assert_array_equal
-
 from scipy.sparse import bsr_matrix, csc_matrix, csr_matrix
 
 from sklearn.feature_selection import VarianceThreshold
+from sklearn.utils._testing import assert_array_equal
 
 data = [[0, 1, 2, 3, 4], [0, 2, 2, 3, 5], [1, 1, 2, 4, 0]]
 
diff --git a/sklearn/gaussian_process/__init__.py b/sklearn/gaussian_process/__init__.py
index 719208b7951be..bc0d902b45b18 100644
--- a/sklearn/gaussian_process/__init__.py
+++ b/sklearn/gaussian_process/__init__.py
@@ -8,9 +8,8 @@
 based regression and classification.
 """
 
-from ._gpr import GaussianProcessRegressor
-from ._gpc import GaussianProcessClassifier
 from . import kernels
-
+from ._gpc import GaussianProcessClassifier
+from ._gpr import GaussianProcessRegressor
 
 __all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"]
diff --git a/sklearn/gaussian_process/_gpc.py b/sklearn/gaussian_process/_gpc.py
index 50a8739372972..013815795a853 100644
--- a/sklearn/gaussian_process/_gpc.py
+++ b/sklearn/gaussian_process/_gpc.py
@@ -8,20 +8,19 @@
 from operator import itemgetter
 
 import numpy as np
-from scipy.linalg import cholesky, cho_solve, solve
 import scipy.optimize
+from scipy.linalg import cho_solve, cholesky, solve
 from scipy.special import erf, expit
 
-from ..base import BaseEstimator, ClassifierMixin, clone
-from ..base import _fit_context
-from .kernels import Kernel, RBF, CompoundKernel, ConstantKernel as C
-from ..utils.validation import check_is_fitted
+from ..base import BaseEstimator, ClassifierMixin, _fit_context, clone
+from ..multiclass import OneVsOneClassifier, OneVsRestClassifier
+from ..preprocessing import LabelEncoder
 from ..utils import check_random_state
-from ..utils.optimize import _check_optimize_result
 from ..utils._param_validation import Interval, StrOptions
-from ..preprocessing import LabelEncoder
-from ..multiclass import OneVsRestClassifier, OneVsOneClassifier
-
+from ..utils.optimize import _check_optimize_result
+from ..utils.validation import check_is_fitted
+from .kernels import RBF, CompoundKernel, Kernel
+from .kernels import ConstantKernel as C
 
 # Values required for approximating the logistic sigmoid by
 # error functions. coefs are obtained via:
diff --git a/sklearn/gaussian_process/_gpr.py b/sklearn/gaussian_process/_gpr.py
index 49fcab40c25f8..d3723016be127 100644
--- a/sklearn/gaussian_process/_gpr.py
+++ b/sklearn/gaussian_process/_gpr.py
@@ -9,17 +9,16 @@
 from operator import itemgetter
 
 import numpy as np
-from scipy.linalg import cholesky, cho_solve, solve_triangular
 import scipy.optimize
+from scipy.linalg import cho_solve, cholesky, solve_triangular
 
-from ..base import BaseEstimator, RegressorMixin, clone
-from ..base import MultiOutputMixin
-from ..base import _fit_context
-from .kernels import Kernel, RBF, ConstantKernel as C
+from ..base import BaseEstimator, MultiOutputMixin, RegressorMixin, _fit_context, clone
 from ..preprocessing._data import _handle_zeros_in_scale
 from ..utils import check_random_state
-from ..utils.optimize import _check_optimize_result
 from ..utils._param_validation import Interval, StrOptions
+from ..utils.optimize import _check_optimize_result
+from .kernels import RBF, Kernel
+from .kernels import ConstantKernel as C
 
 GPR_CHOLESKY_LOWER = True
 
@@ -39,6 +38,10 @@ class GaussianProcessRegressor(MultiOutputMixin, RegressorMixin, BaseEstimator):
          externally for other ways of selecting hyperparameters, e.g., via
          Markov chain Monte Carlo.
 
+    To learn the difference between a point-estimate approach vs. a more
+    Bayesian modelling approach, refer to the example entitled
+    :ref:`sphx_glr_auto_examples_gaussian_process_plot_compare_gpr_krr.py`.
+
     Read more in the :ref:`User Guide <gaussian_process>`.
 
     .. versionadded:: 0.18
diff --git a/sklearn/gaussian_process/kernels.py b/sklearn/gaussian_process/kernels.py
index 1e0866afb6a4d..95db7b13c33ff 100644
--- a/sklearn/gaussian_process/kernels.py
+++ b/sklearn/gaussian_process/kernels.py
@@ -19,21 +19,20 @@
 # Note: this module is strongly inspired by the kernel module of the george
 #       package.
 
+import math
+import warnings
 from abc import ABCMeta, abstractmethod
 from collections import namedtuple
-import math
 from inspect import signature
 
 import numpy as np
-from scipy.special import kv, gamma
-from scipy.spatial.distance import pdist, cdist, squareform
+from scipy.spatial.distance import cdist, pdist, squareform
+from scipy.special import gamma, kv
 
-from ..metrics.pairwise import pairwise_kernels
 from ..base import clone
-from ..utils.validation import _num_samples
 from ..exceptions import ConvergenceWarning
-
-import warnings
+from ..metrics.pairwise import pairwise_kernels
+from ..utils.validation import _num_samples
 
 
 def _check_length_scale(X, length_scale):
diff --git a/sklearn/gaussian_process/tests/_mini_sequence_kernel.py b/sklearn/gaussian_process/tests/_mini_sequence_kernel.py
index ad81890680168..4667329aff9b8 100644
--- a/sklearn/gaussian_process/tests/_mini_sequence_kernel.py
+++ b/sklearn/gaussian_process/tests/_mini_sequence_kernel.py
@@ -1,8 +1,12 @@
-from sklearn.gaussian_process.kernels import Kernel, Hyperparameter
-from sklearn.gaussian_process.kernels import GenericKernelMixin
-from sklearn.gaussian_process.kernels import StationaryKernelMixin
 import numpy as np
+
 from sklearn.base import clone
+from sklearn.gaussian_process.kernels import (
+    GenericKernelMixin,
+    Hyperparameter,
+    Kernel,
+    StationaryKernelMixin,
+)
 
 
 class MiniSeqKernel(GenericKernelMixin, StationaryKernelMixin, Kernel):
diff --git a/sklearn/gaussian_process/tests/test_gpc.py b/sklearn/gaussian_process/tests/test_gpc.py
index aefdb2e8ff0e2..842159f13ac04 100644
--- a/sklearn/gaussian_process/tests/test_gpc.py
+++ b/sklearn/gaussian_process/tests/test_gpc.py
@@ -4,22 +4,22 @@
 # License: BSD 3 clause
 
 import warnings
-import numpy as np
-
-from scipy.optimize import approx_fprime
 
+import numpy as np
 import pytest
+from scipy.optimize import approx_fprime
 
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.gaussian_process import GaussianProcessClassifier
 from sklearn.gaussian_process.kernels import (
     RBF,
     CompoundKernel,
-    ConstantKernel as C,
     WhiteKernel,
 )
+from sklearn.gaussian_process.kernels import (
+    ConstantKernel as C,
+)
 from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel
-from sklearn.exceptions import ConvergenceWarning
-
 from sklearn.utils._testing import assert_almost_equal, assert_array_equal
 
 
diff --git a/sklearn/gaussian_process/tests/test_gpr.py b/sklearn/gaussian_process/tests/test_gpr.py
index 2de35d4659ce6..d890dc05d9f02 100644
--- a/sklearn/gaussian_process/tests/test_gpr.py
+++ b/sklearn/gaussian_process/tests/test_gpr.py
@@ -4,29 +4,31 @@
 # Modified by: Pete Green <p.l.green@liverpool.ac.uk>
 # License: BSD 3 clause
 
-import warnings
-import sys
 import re
-import numpy as np
-
-from scipy.optimize import approx_fprime
+import sys
+import warnings
 
+import numpy as np
 import pytest
+from scipy.optimize import approx_fprime
 
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.gaussian_process import GaussianProcessRegressor
 from sklearn.gaussian_process.kernels import (
     RBF,
-    ConstantKernel as C,
+    DotProduct,
+    ExpSineSquared,
     WhiteKernel,
 )
-from sklearn.gaussian_process.kernels import DotProduct, ExpSineSquared
+from sklearn.gaussian_process.kernels import (
+    ConstantKernel as C,
+)
 from sklearn.gaussian_process.tests._mini_sequence_kernel import MiniSeqKernel
-from sklearn.exceptions import ConvergenceWarning
 from sklearn.utils._testing import (
-    assert_array_less,
+    assert_allclose,
     assert_almost_equal,
     assert_array_almost_equal,
-    assert_allclose,
+    assert_array_less,
 )
 
 
diff --git a/sklearn/gaussian_process/tests/test_kernels.py b/sklearn/gaussian_process/tests/test_kernels.py
index 56ab9c8b6c2bf..8733f94c94e06 100644
--- a/sklearn/gaussian_process/tests/test_kernels.py
+++ b/sklearn/gaussian_process/tests/test_kernels.py
@@ -3,40 +3,38 @@
 # Author: Jan Hendrik Metzen <jhm@informatik.uni-bremen.de>
 # License: BSD 3 clause
 
-import pytest
-import numpy as np
 from inspect import signature
 
-from sklearn.gaussian_process.kernels import _approx_fprime
+import numpy as np
+import pytest
 
-from sklearn.metrics.pairwise import (
-    PAIRWISE_KERNEL_FUNCTIONS,
-    euclidean_distances,
-    pairwise_kernels,
-)
+from sklearn.base import clone
 from sklearn.gaussian_process.kernels import (
     RBF,
+    CompoundKernel,
+    ConstantKernel,
+    DotProduct,
+    Exponentiation,
+    ExpSineSquared,
+    KernelOperator,
     Matern,
+    PairwiseKernel,
     RationalQuadratic,
-    ExpSineSquared,
-    DotProduct,
-    ConstantKernel,
     WhiteKernel,
-    PairwiseKernel,
-    KernelOperator,
-    Exponentiation,
-    CompoundKernel,
+    _approx_fprime,
+)
+from sklearn.metrics.pairwise import (
+    PAIRWISE_KERNEL_FUNCTIONS,
+    euclidean_distances,
+    pairwise_kernels,
 )
-from sklearn.base import clone
-
 from sklearn.utils._testing import (
+    assert_allclose,
     assert_almost_equal,
-    assert_array_equal,
     assert_array_almost_equal,
-    assert_allclose,
+    assert_array_equal,
 )
 
-
 X = np.random.RandomState(0).normal(0, 1, (5, 2))
 Y = np.random.RandomState(0).normal(0, 1, (6, 2))
 
diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index 37fc43731514a..e182a6ca73e61 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -10,17 +10,13 @@
 import numpy.ma as ma
 from scipy import sparse as sp
 
-from ..base import BaseEstimator, TransformerMixin
-from ..base import _fit_context
-from ..utils._param_validation import StrOptions, MissingValues
+from ..base import BaseEstimator, TransformerMixin, _fit_context
+from ..utils import _is_pandas_na, is_scalar_nan
+from ..utils._mask import _get_mask
+from ..utils._param_validation import MissingValues, StrOptions
 from ..utils.fixes import _mode
 from ..utils.sparsefuncs import _get_median
-from ..utils.validation import check_is_fitted
-from ..utils.validation import FLOAT_DTYPES
-from ..utils.validation import _check_feature_names_in
-from ..utils._mask import _get_mask
-from ..utils import _is_pandas_na
-from ..utils import is_scalar_nan
+from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted
 
 
 def _check_inputs_dtype(X, missing_values):
@@ -260,6 +256,9 @@ class SimpleImputer(_BaseImputer):
     [[ 7.   2.   3. ]
      [ 4.   3.5  6. ]
      [10.   3.5  9. ]]
+
+    For a more detailed example see
+    :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
     """
 
     _parameter_constraints: dict = {
@@ -699,8 +698,10 @@ class MissingIndicator(TransformerMixin, BaseEstimator):
     """Binary indicators for missing values.
 
     Note that this component typically should not be used in a vanilla
-    :class:`Pipeline` consisting of transformers and a classifier, but rather
-    could be added using a :class:`FeatureUnion` or :class:`ColumnTransformer`.
+    :class:`~sklearn.pipeline.Pipeline` consisting of transformers and a
+    classifier, but rather could be added using a
+    :class:`~sklearn.pipeline.FeatureUnion` or
+    :class:`~sklearn.compose.ColumnTransformer`.
 
     Read more in the :ref:`User Guide <impute>`.
 
diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py
index f977e5bc23e6c..e08f1bd773a34 100644
--- a/sklearn/impute/_iterative.py
+++ b/sklearn/impute/_iterative.py
@@ -1,31 +1,25 @@
-from time import time
+import warnings
 from collections import namedtuple
 from numbers import Integral, Real
-import warnings
+from time import time
 
-from scipy import stats
 import numpy as np
+from scipy import stats
 
-from ..base import clone
-from ..base import _fit_context
+from ..base import _fit_context, clone
 from ..exceptions import ConvergenceWarning
 from ..preprocessing import normalize
 from ..utils import (
+    _safe_assign,
+    _safe_indexing,
     check_array,
     check_random_state,
     is_scalar_nan,
-    _safe_assign,
-    _safe_indexing,
 )
-from ..utils.validation import FLOAT_DTYPES, check_is_fitted
-from ..utils.validation import _check_feature_names_in
 from ..utils._mask import _get_mask
 from ..utils._param_validation import HasMethods, Interval, StrOptions
-
-from ._base import _BaseImputer
-from ._base import SimpleImputer
-from ._base import _check_inputs_dtype
-
+from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted
+from ._base import SimpleImputer, _BaseImputer, _check_inputs_dtype
 
 _ImputerTriplet = namedtuple(
     "_ImputerTriplet", ["feat_idx", "neighbor_feat_idx", "estimator"]
@@ -279,6 +273,10 @@ class IterativeImputer(_BaseImputer):
     array([[ 6.9584...,  2.       ,  3.        ],
            [ 4.       ,  2.6000...,  6.        ],
            [10.       ,  4.9999...,  9.        ]])
+
+    For a more detailed example see
+    :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py` or
+    :ref:`sphx_glr_auto_examples_impute_plot_iterative_imputer_variants_comparison.py`.
     """
 
     _parameter_constraints: dict = {
diff --git a/sklearn/impute/_knn.py b/sklearn/impute/_knn.py
index 915f8cbdb3fcb..7da7785369c0d 100644
--- a/sklearn/impute/_knn.py
+++ b/sklearn/impute/_knn.py
@@ -3,19 +3,18 @@
 # License: BSD 3 clause
 
 from numbers import Integral
+
 import numpy as np
 
-from ._base import _BaseImputer
 from ..base import _fit_context
-from ..utils.validation import FLOAT_DTYPES
 from ..metrics import pairwise_distances_chunked
 from ..metrics.pairwise import _NAN_METRICS
 from ..neighbors._base import _get_weights
 from ..utils import is_scalar_nan
 from ..utils._mask import _get_mask
-from ..utils.validation import check_is_fitted
-from ..utils.validation import _check_feature_names_in
 from ..utils._param_validation import Hidden, Interval, StrOptions
+from ..utils.validation import FLOAT_DTYPES, _check_feature_names_in, check_is_fitted
+from ._base import _BaseImputer
 
 
 class KNNImputer(_BaseImputer):
@@ -122,6 +121,9 @@ class KNNImputer(_BaseImputer):
            [3. , 4. , 3. ],
            [5.5, 6. , 5. ],
            [8. , 8. , 7. ]])
+
+    For a more detailed example see
+    :ref:`sphx_glr_auto_examples_impute_plot_missing_values.py`.
     """
 
     _parameter_constraints: dict = {
@@ -283,7 +285,12 @@ def transform(self, X):
                 Xc[:, ~valid_mask] = 0
             else:
                 Xc = X[:, valid_mask]
-            return Xc
+
+            # Even if there are no missing values in X, we still concatenate Xc
+            # with the missing value indicator matrix, X_indicator.
+            # This is to ensure that the output maintains consistency in terms
+            # of columns, regardless of whether missing values exist in X or not.
+            return super()._concatenate_indicator(Xc, X_indicator)
 
         row_missing_idx = np.flatnonzero(mask.any(axis=1))
 
diff --git a/sklearn/impute/tests/test_base.py b/sklearn/impute/tests/test_base.py
index fedfdebb20a1f..0c1bd83f7ca9e 100644
--- a/sklearn/impute/tests/test_base.py
+++ b/sklearn/impute/tests/test_base.py
@@ -1,12 +1,10 @@
-import pytest
-
 import numpy as np
-
-from sklearn.utils._mask import _get_mask
-from sklearn.utils._testing import _convert_container, assert_allclose
+import pytest
 
 from sklearn.impute._base import _BaseImputer
 from sklearn.impute._iterative import _assign_where
+from sklearn.utils._mask import _get_mask
+from sklearn.utils._testing import _convert_container, assert_allclose
 
 
 @pytest.fixture
diff --git a/sklearn/impute/tests/test_common.py b/sklearn/impute/tests/test_common.py
index 00521ca090dc5..32ac789375b9b 100644
--- a/sklearn/impute/tests/test_common.py
+++ b/sklearn/impute/tests/test_common.py
@@ -1,17 +1,14 @@
-import pytest
-
 import numpy as np
+import pytest
 from scipy import sparse
 
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_allclose_dense_sparse
-from sklearn.utils._testing import assert_array_equal
-
 from sklearn.experimental import enable_iterative_imputer  # noqa
-
-from sklearn.impute import IterativeImputer
-from sklearn.impute import KNNImputer
-from sklearn.impute import SimpleImputer
+from sklearn.impute import IterativeImputer, KNNImputer, SimpleImputer
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_array_equal,
+)
 
 
 def imputers():
@@ -184,3 +181,39 @@ def test_keep_empty_features(imputer, keep_empty_features):
             assert X_imputed.shape == X.shape
         else:
             assert X_imputed.shape == (X.shape[0], X.shape[1] - 1)
+
+
+@pytest.mark.parametrize("imputer", imputers(), ids=lambda x: x.__class__.__name__)
+@pytest.mark.parametrize("missing_value_test", [np.nan, 1])
+def test_imputation_adds_missing_indicator_if_add_indicator_is_true(
+    imputer, missing_value_test
+):
+    """Check that missing indicator always exists when add_indicator=True.
+
+    Non-regression test for gh-26590.
+    """
+    X_train = np.array([[0, np.nan], [1, 2]])
+
+    # Test data where missing_value_test variable can be set to np.nan or 1.
+    X_test = np.array([[0, missing_value_test], [1, 2]])
+
+    imputer.set_params(add_indicator=True)
+    imputer.fit(X_train)
+
+    X_test_imputed_with_indicator = imputer.transform(X_test)
+    assert X_test_imputed_with_indicator.shape == (2, 3)
+
+    imputer.set_params(add_indicator=False)
+    imputer.fit(X_train)
+    X_test_imputed_without_indicator = imputer.transform(X_test)
+    assert X_test_imputed_without_indicator.shape == (2, 2)
+
+    assert_allclose(
+        X_test_imputed_with_indicator[:, :-1], X_test_imputed_without_indicator
+    )
+    if np.isnan(missing_value_test):
+        expected_missing_indicator = [1, 0]
+    else:
+        expected_missing_indicator = [0, 0]
+
+    assert_allclose(X_test_imputed_with_indicator[:, -1], expected_missing_indicator)
diff --git a/sklearn/impute/tests/test_impute.py b/sklearn/impute/tests/test_impute.py
index 24b070d21ef06..57f9a3d5159e0 100644
--- a/sklearn/impute/tests/test_impute.py
+++ b/sklearn/impute/tests/test_impute.py
@@ -1,33 +1,31 @@
-import pytest
+import io
 import warnings
 
 import numpy as np
+import pytest
 from scipy import sparse
 from scipy.stats import kstest
 
-import io
-
-from sklearn.utils._testing import _convert_container
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_allclose_dense_sparse
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
+from sklearn import tree
+from sklearn.datasets import load_diabetes
+from sklearn.dummy import DummyRegressor
+from sklearn.exceptions import ConvergenceWarning
 
 # make IterativeImputer available
 from sklearn.experimental import enable_iterative_imputer  # noqa
-
-from sklearn.datasets import load_diabetes
-from sklearn.impute import MissingIndicator
-from sklearn.impute import SimpleImputer, IterativeImputer, KNNImputer
-from sklearn.dummy import DummyRegressor
-from sklearn.linear_model import BayesianRidge, ARDRegression, RidgeCV
-from sklearn.pipeline import Pipeline
-from sklearn.pipeline import make_union
+from sklearn.impute import IterativeImputer, KNNImputer, MissingIndicator, SimpleImputer
+from sklearn.impute._base import _most_frequent
+from sklearn.linear_model import ARDRegression, BayesianRidge, RidgeCV
 from sklearn.model_selection import GridSearchCV
-from sklearn import tree
+from sklearn.pipeline import Pipeline, make_union
 from sklearn.random_projection import _sparse_random_matrix
-from sklearn.exceptions import ConvergenceWarning
-from sklearn.impute._base import _most_frequent
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 
 def _assert_array_equal_and_same_dtype(x, y):
@@ -261,7 +259,7 @@ def test_imputation_median_special_cases():
 @pytest.mark.parametrize("dtype", [None, object, str])
 def test_imputation_mean_median_error_invalid_type(strategy, dtype):
     X = np.array([["a", "b", 3], [4, "e", 6], ["g", "h", 9]], dtype=dtype)
-    msg = "non-numeric data:\ncould not convert string to float: '"
+    msg = "non-numeric data:\ncould not convert string to float:"
     with pytest.raises(ValueError, match=msg):
         imputer = SimpleImputer(strategy=strategy)
         imputer.fit_transform(X)
@@ -274,7 +272,7 @@ def test_imputation_mean_median_error_invalid_type_list_pandas(strategy, type):
     if type == "dataframe":
         pd = pytest.importorskip("pandas")
         X = pd.DataFrame(X)
-    msg = "non-numeric data:\ncould not convert string to float: '"
+    msg = "non-numeric data:\ncould not convert string to float:"
     with pytest.raises(ValueError, match=msg):
         imputer = SimpleImputer(strategy=strategy)
         imputer.fit_transform(X)
@@ -1671,7 +1669,7 @@ def test_simple_imputer_constant_keep_empty_features(array_type, keep_empty_feat
         X_imputed = getattr(imputer, method)(X)
         assert X_imputed.shape == X.shape
         constant_feature = (
-            X_imputed[:, 0].A if array_type == "sparse" else X_imputed[:, 0]
+            X_imputed[:, 0].toarray() if array_type == "sparse" else X_imputed[:, 0]
         )
         assert_array_equal(constant_feature, fill_value)
 
@@ -1692,7 +1690,7 @@ def test_simple_imputer_keep_empty_features(strategy, array_type, keep_empty_fea
         if keep_empty_features:
             assert X_imputed.shape == X.shape
             constant_feature = (
-                X_imputed[:, 0].A if array_type == "sparse" else X_imputed[:, 0]
+                X_imputed[:, 0].toarray() if array_type == "sparse" else X_imputed[:, 0]
             )
             assert_array_equal(constant_feature, 0)
         else:
diff --git a/sklearn/impute/tests/test_knn.py b/sklearn/impute/tests/test_knn.py
index 80ee1d0c2b574..141c2ea90dbd9 100644
--- a/sklearn/impute/tests/test_knn.py
+++ b/sklearn/impute/tests/test_knn.py
@@ -3,8 +3,7 @@
 
 from sklearn import config_context
 from sklearn.impute import KNNImputer
-from sklearn.metrics.pairwise import nan_euclidean_distances
-from sklearn.metrics.pairwise import pairwise_distances
+from sklearn.metrics.pairwise import nan_euclidean_distances, pairwise_distances
 from sklearn.neighbors import KNeighborsRegressor
 from sklearn.utils._testing import assert_allclose
 
diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py
index f73ffe8cff26f..f8e08785e8358 100644
--- a/sklearn/inspection/__init__.py
+++ b/sklearn/inspection/__init__.py
@@ -1,13 +1,11 @@
 """The :mod:`sklearn.inspection` module includes tools for model inspection."""
 
 
+from ._partial_dependence import partial_dependence
 from ._permutation_importance import permutation_importance
 from ._plot.decision_boundary import DecisionBoundaryDisplay
-
-from ._partial_dependence import partial_dependence
 from ._plot.partial_dependence import PartialDependenceDisplay
 
-
 __all__ = [
     "partial_dependence",
     "permutation_importance",
diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py
index e3af7dda1e505..d54adc90444fc 100644
--- a/sklearn/inspection/_partial_dependence.py
+++ b/sklearn/inspection/_partial_dependence.py
@@ -11,18 +11,23 @@
 from scipy import sparse
 from scipy.stats.mstats import mquantiles
 
-from ._pd_utils import _check_feature_names, _get_feature_index
 from ..base import is_classifier, is_regressor
-from ..utils.extmath import cartesian
-from ..utils import check_array
-from ..utils import check_matplotlib_support  # noqa
-from ..utils import _safe_indexing
-from ..utils import _safe_assign
-from ..utils import _determine_key_type
-from ..utils import _get_column_indices
-from ..utils.validation import _check_sample_weight
-from ..utils.validation import check_is_fitted
-from ..utils import Bunch
+from ..ensemble import RandomForestRegressor
+from ..ensemble._gb import BaseGradientBoosting
+from ..ensemble._hist_gradient_boosting.gradient_boosting import (
+    BaseHistGradientBoosting,
+)
+from ..exceptions import NotFittedError
+from ..tree import DecisionTreeRegressor
+from ..utils import (
+    Bunch,
+    _determine_key_type,
+    _get_column_indices,
+    _safe_assign,
+    _safe_indexing,
+    check_array,
+    check_matplotlib_support,  # noqa
+)
 from ..utils._param_validation import (
     HasMethods,
     Integral,
@@ -30,14 +35,9 @@
     StrOptions,
     validate_params,
 )
-from ..tree import DecisionTreeRegressor
-from ..ensemble import RandomForestRegressor
-from ..exceptions import NotFittedError
-from ..ensemble._gb import BaseGradientBoosting
-from ..ensemble._hist_gradient_boosting.gradient_boosting import (
-    BaseHistGradientBoosting,
-)
-
+from ..utils.extmath import cartesian
+from ..utils.validation import _check_sample_weight, check_is_fitted
+from ._pd_utils import _check_feature_names, _get_feature_index
 
 __all__ = [
     "partial_dependence",
@@ -367,7 +367,8 @@ def _partial_dependence_brute(
         "grid_resolution": [Interval(Integral, 1, None, closed="left")],
         "method": [StrOptions({"auto", "recursion", "brute"})],
         "kind": [StrOptions({"average", "individual", "both"})],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def partial_dependence(
     estimator,
diff --git a/sklearn/inspection/_permutation_importance.py b/sklearn/inspection/_permutation_importance.py
index 9330589a04794..a347fd63fae7a 100644
--- a/sklearn/inspection/_permutation_importance.py
+++ b/sklearn/inspection/_permutation_importance.py
@@ -1,15 +1,13 @@
 """Permutation importance for estimators."""
 import numbers
+
 import numpy as np
 
 from ..ensemble._bagging import _generate_indices
 from ..metrics import check_scoring, get_scorer_names
 from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer
 from ..model_selection._validation import _aggregate_score_dicts
-from ..utils import Bunch, _safe_indexing
-from ..utils import check_random_state
-from ..utils import check_array
-from ..utils.parallel import delayed, Parallel
+from ..utils import Bunch, _safe_indexing, check_array, check_random_state
 from ..utils._param_validation import (
     HasMethods,
     Integral,
@@ -18,6 +16,7 @@
     StrOptions,
     validate_params,
 )
+from ..utils.parallel import Parallel, delayed
 
 
 def _weights_scorer(scorer, estimator, X, y, sample_weight):
@@ -128,7 +127,8 @@ def _create_importances_bunch(baseline_score, permuted_score):
             Interval(Integral, 1, None, closed="left"),
             Interval(RealNotInt, 0, 1, closed="right"),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def permutation_importance(
     estimator,
diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py
index 22b4590d9bc3c..c9d2a52b6e9ab 100644
--- a/sklearn/inspection/_plot/decision_boundary.py
+++ b/sklearn/inspection/_plot/decision_boundary.py
@@ -2,14 +2,13 @@
 
 import numpy as np
 
-from ...preprocessing import LabelEncoder
-from ...utils import check_matplotlib_support
-from ...utils import _safe_indexing
 from ...base import is_regressor
+from ...preprocessing import LabelEncoder
+from ...utils import _safe_indexing, check_matplotlib_support
 from ...utils.validation import (
-    check_is_fitted,
     _is_arraylike_not_scalar,
     _num_features,
+    check_is_fitted,
 )
 
 
@@ -275,10 +274,10 @@ def from_estimator(
         See Also
         --------
         DecisionBoundaryDisplay : Decision boundary visualization.
-        ConfusionMatrixDisplay.from_estimator : Plot the confusion matrix
-            given an estimator, the data, and the label.
-        ConfusionMatrixDisplay.from_predictions : Plot the confusion matrix
-            given the true and predicted labels.
+        sklearn.metrics.ConfusionMatrixDisplay.from_estimator : Plot the
+            confusion matrix given an estimator, the data, and the label.
+        sklearn.metrics.ConfusionMatrixDisplay.from_predictions : Plot the
+            confusion matrix given the true and predicted labels.
 
         Examples
         --------
diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py
index 48e151cefedbe..7414433ed3f56 100644
--- a/sklearn/inspection/_plot/partial_dependence.py
+++ b/sklearn/inspection/_plot/partial_dependence.py
@@ -6,16 +6,18 @@
 from scipy import sparse
 from scipy.stats.mstats import mquantiles
 
-from .. import partial_dependence
-from .._pd_utils import _check_feature_names, _get_feature_index
 from ...base import is_regressor
-from ...utils import Bunch
-from ...utils import check_array
-from ...utils import check_matplotlib_support  # noqa
-from ...utils import check_random_state
-from ...utils import _safe_indexing
-from ...utils.parallel import delayed, Parallel
+from ...utils import (
+    Bunch,
+    _safe_indexing,
+    check_array,
+    check_matplotlib_support,  # noqa
+    check_random_state,
+)
 from ...utils._encode import _unique
+from ...utils.parallel import Parallel, delayed
+from .. import partial_dependence
+from .._pd_utils import _check_feature_names, _get_feature_index
 
 
 class PartialDependenceDisplay:
@@ -84,8 +86,9 @@ class PartialDependenceDisplay:
 
         .. note::
            The fast ``method='recursion'`` option is only available for
-           ``kind='average'``. Plotting individual dependencies requires using
-           the slower ``method='brute'`` option.
+           `kind='average'` and `sample_weights=None`. Computing individual
+           dependencies and doing weighted averages requires using the slower
+           `method='brute'`.
 
         .. versionadded:: 0.24
            Add `kind` parameter with `'average'`, `'individual'`, and `'both'`
@@ -245,6 +248,7 @@ def from_estimator(
         X,
         features,
         *,
+        sample_weight=None,
         categorical_features=None,
         feature_names=None,
         target=None,
@@ -335,6 +339,14 @@ def from_estimator(
             with `kind='average'`). Each tuple must be of size 2.
             If any entry is a string, then it must be in ``feature_names``.
 
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights are used to calculate weighted means when averaging the
+            model output. If `None`, then samples are equally weighted. If
+            `sample_weight` is not `None`, then `method` will be set to `'brute'`.
+            Note that `sample_weight` is ignored for `kind='individual'`.
+
+            .. versionadded:: 1.3
+
         categorical_features : array-like of shape (n_features,) or shape \
                 (n_categorical_features,), dtype={bool, int, str}, default=None
             Indicates the categorical features.
@@ -407,7 +419,8 @@ def from_estimator(
               computationally intensive.
 
             - `'auto'`: the `'recursion'` is used for estimators that support it,
-              and `'brute'` is used otherwise.
+              and `'brute'` is used otherwise. If `sample_weight` is not `None`,
+              then `'brute'` is used regardless of the estimator.
 
             Please see :ref:`this note <pdp_method_differences>` for
             differences between the `'brute'` and `'recursion'` method.
@@ -462,9 +475,10 @@ def from_estimator(
             - ``kind='average'`` results in the traditional PD plot;
             - ``kind='individual'`` results in the ICE plot.
 
-           Note that the fast ``method='recursion'`` option is only available for
-           ``kind='average'``. Plotting individual dependencies requires using the
-           slower ``method='brute'`` option.
+           Note that the fast `method='recursion'` option is only available for
+           `kind='average'` and `sample_weights=None`. Computing individual
+           dependencies and doing weighted averages requires using the slower
+           `method='brute'`.
 
         centered : bool, default=False
             If `True`, the ICE and PD lines will start at the origin of the
@@ -691,6 +705,7 @@ def from_estimator(
                 estimator,
                 X,
                 fxs,
+                sample_weight=sample_weight,
                 feature_names=feature_names,
                 categorical_features=categorical_features,
                 response_method=response_method,
diff --git a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
index 73cfe187d7f6e..47c21e4521c35 100644
--- a/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
+++ b/sklearn/inspection/_plot/tests/test_boundary_decision_display.py
@@ -1,21 +1,19 @@
 import warnings
 
-import pytest
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 
-from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
-from sklearn.datasets import make_classification
-from sklearn.linear_model import LogisticRegression
-from sklearn.datasets import load_iris
-from sklearn.datasets import make_multilabel_classification
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.tree import DecisionTreeClassifier
-
+from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.datasets import (
+    load_iris,
+    make_classification,
+    make_multilabel_classification,
+)
 from sklearn.inspection import DecisionBoundaryDisplay
 from sklearn.inspection._plot.decision_boundary import _check_boundary_response_method
-
+from sklearn.linear_model import LogisticRegression
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 
 # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
 pytestmark = pytest.mark.filterwarnings(
diff --git a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
index acda2d001144e..e98fdebaeaf03 100644
--- a/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
+++ b/sklearn/inspection/_plot/tests/test_plot_partial_dependence.py
@@ -1,22 +1,21 @@
 import numpy as np
-from scipy.stats.mstats import mquantiles
-
 import pytest
 from numpy.testing import assert_allclose
+from scipy.stats.mstats import mquantiles
 
-from sklearn.datasets import load_diabetes
-from sklearn.datasets import load_iris
-from sklearn.datasets import make_classification, make_regression
-from sklearn.ensemble import GradientBoostingRegressor
-from sklearn.ensemble import GradientBoostingClassifier
-from sklearn.linear_model import LinearRegression
-from sklearn.utils._testing import _convert_container
 from sklearn.compose import make_column_transformer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.pipeline import make_pipeline
-
+from sklearn.datasets import (
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_regression,
+)
+from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
 from sklearn.inspection import PartialDependenceDisplay
-
+from sklearn.linear_model import LinearRegression
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.utils._testing import _convert_container
 
 # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
 pytestmark = pytest.mark.filterwarnings(
@@ -1087,3 +1086,34 @@ def test_partial_dependence_display_kind_centered_interaction(
     )
 
     assert all([ln._y[0] == 0.0 for ln in disp.lines_.ravel() if ln is not None])
+
+
+def test_partial_dependence_display_with_constant_sample_weight(
+    pyplot,
+    clf_diabetes,
+    diabetes,
+):
+    """Check that the utilization of a constant sample weight maintains the
+    standard behavior.
+    """
+    disp = PartialDependenceDisplay.from_estimator(
+        clf_diabetes,
+        diabetes.data,
+        [0, 1],
+        kind="average",
+        method="brute",
+    )
+
+    sample_weight = np.ones_like(diabetes.target)
+    disp_sw = PartialDependenceDisplay.from_estimator(
+        clf_diabetes,
+        diabetes.data,
+        [0, 1],
+        sample_weight=sample_weight,
+        kind="average",
+        method="brute",
+    )
+
+    assert np.array_equal(
+        disp.pd_results[0]["average"], disp_sw.pd_results[0]["average"]
+    )
diff --git a/sklearn/inspection/tests/test_partial_dependence.py b/sklearn/inspection/tests/test_partial_dependence.py
index 4e93985f4d02a..0336dc4b827fe 100644
--- a/sklearn/inspection/tests/test_partial_dependence.py
+++ b/sklearn/inspection/tests/test_partial_dependence.py
@@ -7,41 +7,39 @@
 import pytest
 
 import sklearn
+from sklearn.base import BaseEstimator, ClassifierMixin, clone, is_regressor
+from sklearn.cluster import KMeans
+from sklearn.compose import make_column_transformer
+from sklearn.datasets import load_iris, make_classification, make_regression
+from sklearn.dummy import DummyClassifier
+from sklearn.ensemble import (
+    GradientBoostingClassifier,
+    GradientBoostingRegressor,
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+    RandomForestRegressor,
+)
+from sklearn.exceptions import NotFittedError
 from sklearn.inspection import partial_dependence
 from sklearn.inspection._partial_dependence import (
     _grid_from_X,
     _partial_dependence_brute,
     _partial_dependence_recursion,
 )
-from sklearn.ensemble import GradientBoostingClassifier
-from sklearn.ensemble import GradientBoostingRegressor
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.ensemble import HistGradientBoostingClassifier
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import LogisticRegression
-from sklearn.linear_model import MultiTaskLasso
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.datasets import load_iris
-from sklearn.datasets import make_classification, make_regression
-from sklearn.cluster import KMeans
-from sklearn.compose import make_column_transformer
+from sklearn.linear_model import LinearRegression, LogisticRegression, MultiTaskLasso
 from sklearn.metrics import r2_score
-from sklearn.preprocessing import PolynomialFeatures
-from sklearn.preprocessing import StandardScaler
-from sklearn.preprocessing import RobustScaler
-from sklearn.preprocessing import scale
 from sklearn.pipeline import make_pipeline
-from sklearn.dummy import DummyClassifier
-from sklearn.base import BaseEstimator, ClassifierMixin, clone
-from sklearn.base import is_regressor
-from sklearn.exceptions import NotFittedError
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_equal
+from sklearn.preprocessing import (
+    PolynomialFeatures,
+    RobustScaler,
+    StandardScaler,
+    scale,
+)
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.tree.tests.test_tree import assert_is_subtree
 from sklearn.utils import _IS_32BIT
+from sklearn.utils._testing import assert_allclose, assert_array_equal
 from sklearn.utils.validation import check_random_state
-from sklearn.tree.tests.test_tree import assert_is_subtree
-
 
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
diff --git a/sklearn/inspection/tests/test_pd_utils.py b/sklearn/inspection/tests/test_pd_utils.py
index 5f461ad498f5b..5dea3834a77a7 100644
--- a/sklearn/inspection/tests/test_pd_utils.py
+++ b/sklearn/inspection/tests/test_pd_utils.py
@@ -1,9 +1,8 @@
 import numpy as np
 import pytest
 
-from sklearn.utils._testing import _convert_container
-
 from sklearn.inspection._pd_utils import _check_feature_names, _get_feature_index
+from sklearn.utils._testing import _convert_container
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/inspection/tests/test_permutation_importance.py b/sklearn/inspection/tests/test_permutation_importance.py
index 307d17188e852..b1a680646afe1 100644
--- a/sklearn/inspection/tests/test_permutation_importance.py
+++ b/sklearn/inspection/tests/test_permutation_importance.py
@@ -1,31 +1,27 @@
-import pytest
 import numpy as np
-
+import pytest
 from numpy.testing import assert_allclose
 
 from sklearn.compose import ColumnTransformer
-from sklearn.datasets import load_diabetes
-from sklearn.datasets import load_iris
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_regression
+from sklearn.datasets import (
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_regression,
+)
 from sklearn.dummy import DummyClassifier
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.ensemble import RandomForestClassifier
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import LogisticRegression
+from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
 from sklearn.impute import SimpleImputer
 from sklearn.inspection import permutation_importance
-from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LinearRegression, LogisticRegression
 from sklearn.metrics import (
     get_scorer,
     mean_squared_error,
     r2_score,
 )
+from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import KBinsDiscretizer
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.preprocessing import scale
+from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, StandardScaler, scale
 from sklearn.utils import parallel_backend
 from sklearn.utils._testing import _convert_container
 
diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py
index a1cf95b95591b..4e5f7f7b0034f 100644
--- a/sklearn/isotonic.py
+++ b/sklearn/isotonic.py
@@ -3,20 +3,19 @@
 #          Nelle Varoquaux <nelle.varoquaux@gmail.com>
 # License: BSD 3 clause
 
+import math
+import warnings
+from numbers import Real
+
 import numpy as np
 from scipy import interpolate
 from scipy.stats import spearmanr
-from numbers import Real
-import warnings
-import math
 
-from .base import BaseEstimator, TransformerMixin, RegressorMixin
-from .base import _fit_context
+from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique
+from .base import BaseEstimator, RegressorMixin, TransformerMixin, _fit_context
 from .utils import check_array, check_consistent_length
-from .utils.validation import _check_sample_weight, check_is_fitted
 from .utils._param_validation import Interval, StrOptions
-from ._isotonic import _inplace_contiguous_isotonic_regression, _make_unique
-
+from .utils.validation import _check_sample_weight, check_is_fitted
 
 __all__ = ["check_increasing", "isotonic_regression", "IsotonicRegression"]
 
diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
index 7f190a2b66823..dfe8e33fdf4c2 100644
--- a/sklearn/kernel_approximation.py
+++ b/sklearn/kernel_approximation.py
@@ -8,8 +8,8 @@
 
 # License: BSD 3 clause
 
-from numbers import Integral, Real
 import warnings
+from numbers import Integral, Real
 
 import numpy as np
 import scipy.sparse as sp
@@ -20,20 +20,21 @@
 except ImportError:  # scipy < 1.4
     from scipy.fftpack import fft, ifft
 
-from .base import BaseEstimator
-from .base import TransformerMixin
-from .base import ClassNamePrefixFeaturesOutMixin
-from .base import _fit_context
-from .utils import check_random_state
-from .utils import deprecated
+from .base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from .metrics.pairwise import KERNEL_PARAMS, PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
+from .utils import check_random_state, deprecated
+from .utils._param_validation import Interval, StrOptions
 from .utils.extmath import safe_sparse_dot
-from .utils.validation import check_is_fitted
-from .utils.validation import _check_feature_names_in
-from .metrics.pairwise import pairwise_kernels, KERNEL_PARAMS
-from .utils.validation import check_non_negative
-from .utils._param_validation import Interval
-from .utils._param_validation import StrOptions
-from .metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS
+from .utils.validation import (
+    _check_feature_names_in,
+    check_is_fitted,
+    check_non_negative,
+)
 
 
 class PolynomialCountSketch(
@@ -362,7 +363,7 @@ def fit(self, X, y=None):
         X = self._validate_data(X, accept_sparse="csr")
         random_state = check_random_state(self.random_state)
         n_features = X.shape[1]
-        sparse = sp.isspmatrix(X)
+        sparse = sp.issparse(X)
         if self.gamma == "scale":
             # var = E[X^2] - E[X]^2 if sparse
             X_var = (X.multiply(X)).mean() - (X.mean()) ** 2 if sparse else X.var()
diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py
index a7bfeefaef651..8842b31693394 100644
--- a/sklearn/kernel_ridge.py
+++ b/sklearn/kernel_ridge.py
@@ -7,12 +7,11 @@
 
 import numpy as np
 
-from .base import BaseEstimator, RegressorMixin, MultiOutputMixin
-from .base import _fit_context
-from .utils._param_validation import Interval, StrOptions
-from .metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
+from .base import BaseEstimator, MultiOutputMixin, RegressorMixin, _fit_context
 from .linear_model._ridge import _solve_cholesky_kernel
-from .utils.validation import check_is_fitted, _check_sample_weight
+from .metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
+from .utils._param_validation import Interval, StrOptions
+from .utils.validation import _check_sample_weight, check_is_fitted
 
 
 class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator):
@@ -52,7 +51,7 @@ class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator):
 
     kernel : str or callable, default="linear"
         Kernel mapping used internally. This parameter is directly passed to
-        :class:`~sklearn.metrics.pairwise.pairwise_kernel`.
+        :class:`~sklearn.metrics.pairwise.pairwise_kernels`.
         If `kernel` is a string, it must be one of the metrics
         in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
         If `kernel` is "precomputed", X is assumed to be a kernel matrix.
diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py
index d5a14756c41a9..45c99d4d36df1 100644
--- a/sklearn/linear_model/__init__.py
+++ b/sklearn/linear_model/__init__.py
@@ -7,46 +7,44 @@
 # complete documentation.
 
 from ._base import LinearRegression
-from ._bayes import BayesianRidge, ARDRegression
-from ._least_angle import (
-    Lars,
-    LassoLars,
-    lars_path,
-    lars_path_gram,
-    LarsCV,
-    LassoLarsCV,
-    LassoLarsIC,
-)
+from ._bayes import ARDRegression, BayesianRidge
 from ._coordinate_descent import (
-    Lasso,
     ElasticNet,
-    LassoCV,
     ElasticNetCV,
-    lasso_path,
-    enet_path,
-    MultiTaskLasso,
+    Lasso,
+    LassoCV,
     MultiTaskElasticNet,
     MultiTaskElasticNetCV,
+    MultiTaskLasso,
     MultiTaskLassoCV,
+    enet_path,
+    lasso_path,
 )
-from ._glm import PoissonRegressor, GammaRegressor, TweedieRegressor
+from ._glm import GammaRegressor, PoissonRegressor, TweedieRegressor
 from ._huber import HuberRegressor
-from ._sgd_fast import Hinge, Log, ModifiedHuber, SquaredLoss, Huber
-from ._stochastic_gradient import SGDClassifier, SGDRegressor, SGDOneClassSVM
-from ._ridge import Ridge, RidgeCV, RidgeClassifier, RidgeClassifierCV, ridge_regression
+from ._least_angle import (
+    Lars,
+    LarsCV,
+    LassoLars,
+    LassoLarsCV,
+    LassoLarsIC,
+    lars_path,
+    lars_path_gram,
+)
 from ._logistic import LogisticRegression, LogisticRegressionCV
 from ._omp import (
-    orthogonal_mp,
-    orthogonal_mp_gram,
     OrthogonalMatchingPursuit,
     OrthogonalMatchingPursuitCV,
+    orthogonal_mp,
+    orthogonal_mp_gram,
 )
-from ._passive_aggressive import PassiveAggressiveClassifier
-from ._passive_aggressive import PassiveAggressiveRegressor
+from ._passive_aggressive import PassiveAggressiveClassifier, PassiveAggressiveRegressor
 from ._perceptron import Perceptron
-
 from ._quantile import QuantileRegressor
 from ._ransac import RANSACRegressor
+from ._ridge import Ridge, RidgeClassifier, RidgeClassifierCV, RidgeCV, ridge_regression
+from ._sgd_fast import Hinge, Huber, Log, ModifiedHuber, SquaredLoss
+from ._stochastic_gradient import SGDClassifier, SGDOneClassSVM, SGDRegressor
 from ._theil_sen import TheilSenRegressor
 
 __all__ = [
diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 92c067c850225..f3ef92f6c4ecc 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -14,33 +14,37 @@
 #         Maria Telenczuk <https://github.com/maikia>
 # License: BSD 3 clause
 
-from abc import ABCMeta, abstractmethod
 import numbers
 import warnings
+from abc import ABCMeta, abstractmethod
+from numbers import Integral
 
 import numpy as np
 import scipy.sparse as sp
-from scipy import linalg
-from scipy import optimize
-from scipy import sparse
+from scipy import linalg, optimize, sparse
 from scipy.sparse.linalg import lsqr
 from scipy.special import expit
-from numbers import Integral
 
-from ..base import BaseEstimator, ClassifierMixin, RegressorMixin, MultiOutputMixin
-from ..base import _fit_context
+from ..base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MultiOutputMixin,
+    RegressorMixin,
+    _fit_context,
+)
 from ..preprocessing._data import _is_constant_feature
-from ..utils import check_array
-from ..utils.validation import FLOAT_DTYPES
-from ..utils import check_random_state
-from ..utils.extmath import safe_sparse_dot
-from ..utils.extmath import _incremental_mean_and_var
-from ..utils.sparsefuncs import mean_variance_axis, inplace_column_scale
+from ..utils import check_array, check_random_state
 from ..utils._array_api import get_namespace
-from ..utils._seq_dataset import ArrayDataset32, CSRDataset32
-from ..utils._seq_dataset import ArrayDataset64, CSRDataset64
-from ..utils.validation import check_is_fitted, _check_sample_weight
-from ..utils.parallel import delayed, Parallel
+from ..utils._seq_dataset import (
+    ArrayDataset32,
+    ArrayDataset64,
+    CSRDataset32,
+    CSRDataset64,
+)
+from ..utils.extmath import _incremental_mean_and_var, safe_sparse_dot
+from ..utils.parallel import Parallel, delayed
+from ..utils.sparsefuncs import inplace_column_scale, mean_variance_axis
+from ..utils.validation import FLOAT_DTYPES, _check_sample_weight, check_is_fitted
 
 # TODO: bayesian_ridge_regression and bayesian_regression_ard
 # should be squashed into its respective objects.
@@ -835,7 +839,7 @@ def _pre_fit(
     """
     n_samples, n_features = X.shape
 
-    if sparse.isspmatrix(X):
+    if sparse.issparse(X):
         # copy is not needed here as X is not modified inplace when X is sparse
         precompute = False
         X, y, X_offset, y_offset, X_scale = _preprocess_data(
diff --git a/sklearn/linear_model/_bayes.py b/sklearn/linear_model/_bayes.py
index 37dc3b81511f5..7b64e91f18c17 100644
--- a/sklearn/linear_model/_bayes.py
+++ b/sklearn/linear_model/_bayes.py
@@ -8,16 +8,16 @@
 import warnings
 from math import log
 from numbers import Integral, Real
+
 import numpy as np
 from scipy import linalg
+from scipy.linalg import pinvh
 
-from ._base import LinearModel, _preprocess_data, _rescale_data
-from ..base import RegressorMixin
-from ..base import _fit_context
+from ..base import RegressorMixin, _fit_context
+from ..utils._param_validation import Hidden, Interval, StrOptions
 from ..utils.extmath import fast_logdet
-from scipy.linalg import pinvh
 from ..utils.validation import _check_sample_weight
-from ..utils._param_validation import Interval, Hidden, StrOptions
+from ._base import LinearModel, _preprocess_data, _rescale_data
 
 
 # TODO(1.5) Remove
diff --git a/sklearn/linear_model/_cd_fast.pyx b/sklearn/linear_model/_cd_fast.pyx
index 3b0b2251abf69..38ba169ae8015 100644
--- a/sklearn/linear_model/_cd_fast.pyx
+++ b/sklearn/linear_model/_cd_fast.pyx
@@ -32,7 +32,9 @@ cdef enum:
     # Max value for our rand_r replacement (near the bottom).
     # We don't use RAND_MAX because it's different across platforms and
     # particularly tiny on Windows/MSVC.
-    RAND_R_MAX = 0x7FFFFFFF
+    # It corresponds to the maximum representable value for
+    # 32-bit signed integers (i.e. 2^31 - 1).
+    RAND_R_MAX = 2147483647
 
 
 cdef inline UINT32_t rand_int(UINT32_t end, UINT32_t* random_state) noexcept nogil:
diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
index 829c0ab6149f1..7c26bf553a334 100644
--- a/sklearn/linear_model/_coordinate_descent.py
+++ b/sklearn/linear_model/_coordinate_descent.py
@@ -5,36 +5,34 @@
 #
 # License: BSD 3 clause
 
+import numbers
 import sys
 import warnings
-import numbers
 from abc import ABC, abstractmethod
 from functools import partial
 from numbers import Integral, Real
 
 import numpy as np
-from scipy import sparse
 from joblib import effective_n_jobs
+from scipy import sparse
 
-from ._base import LinearModel, _pre_fit
-from ..base import RegressorMixin, MultiOutputMixin
-from ..base import _fit_context
-from ._base import _preprocess_data
+from ..base import MultiOutputMixin, RegressorMixin, _fit_context
+from ..model_selection import check_cv
 from ..utils import check_array, check_scalar
-from ..utils.validation import check_random_state
 from ..utils._param_validation import Interval, StrOptions
-from ..model_selection import check_cv
 from ..utils.extmath import safe_sparse_dot
+from ..utils.parallel import Parallel, delayed
 from ..utils.validation import (
     _check_sample_weight,
     check_consistent_length,
     check_is_fitted,
+    check_random_state,
     column_or_1d,
 )
-from ..utils.parallel import delayed, Parallel
 
 # mypy error: Module 'sklearn.linear_model' has no attribute '_cd_fast'
 from . import _cd_fast as cd_fast  # type: ignore
+from ._base import LinearModel, _pre_fit, _preprocess_data
 
 
 def _set_order(X, y, order="C"):
@@ -139,7 +137,7 @@ def _alpha_grid(
 
     sparse_center = False
     if Xy is None:
-        X_sparse = sparse.isspmatrix(X)
+        X_sparse = sparse.issparse(X)
         sparse_center = X_sparse and fit_intercept
         X = check_array(
             X, accept_sparse="csc", copy=(copy_X and fit_intercept and not X_sparse)
@@ -526,7 +524,7 @@ def enet_path(
         raise ValueError("positive=True is not allowed for multi-output (y.ndim != 1)")
 
     # MultiTaskElasticNet does not support sparse matrices
-    if not multi_output and sparse.isspmatrix(X):
+    if not multi_output and sparse.issparse(X):
         if X_offset_param is not None:
             # As sparse matrices are not actually centered we need this to be passed to
             # the CD solver.
@@ -587,7 +585,7 @@ def enet_path(
         # account for n_samples scaling in objectives between here and cd_fast
         l1_reg = alpha * l1_ratio * n_samples
         l2_reg = alpha * (1.0 - l1_ratio) * n_samples
-        if not multi_output and sparse.isspmatrix(X):
+        if not multi_output and sparse.issparse(X):
             model = cd_fast.sparse_enet_coordinate_descent(
                 w=coef_,
                 alpha=l1_reg,
@@ -1067,7 +1065,7 @@ def _decision_function(self, X):
             The predicted decision function.
         """
         check_is_fitted(self)
-        if sparse.isspmatrix(X):
+        if sparse.issparse(X):
             return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_
         else:
             return super()._decision_function(X)
@@ -1512,7 +1510,7 @@ def fit(self, X, y, sample_weight=None):
         check_y_params = dict(
             copy=False, dtype=[np.float64, np.float32], ensure_2d=False
         )
-        if isinstance(X, np.ndarray) or sparse.isspmatrix(X):
+        if isinstance(X, np.ndarray) or sparse.issparse(X):
             # Keep a reference to X
             reference_to_old_X = X
             # Let us not impose fortran ordering so far: it is
@@ -1529,7 +1527,7 @@ def fit(self, X, y, sample_weight=None):
             X, y = self._validate_data(
                 X, y, validate_separately=(check_X_params, check_y_params)
             )
-            if sparse.isspmatrix(X):
+            if sparse.issparse(X):
                 if hasattr(reference_to_old_X, "data") and not np.may_share_memory(
                     reference_to_old_X.data, X.data
                 ):
@@ -1564,7 +1562,7 @@ def fit(self, X, y, sample_weight=None):
                 )
             y = column_or_1d(y, warn=True)
         else:
-            if sparse.isspmatrix(X):
+            if sparse.issparse(X):
                 raise TypeError("X should be dense but a sparse matrix waspassed")
             elif y.ndim == 1:
                 raise ValueError(
@@ -1787,7 +1785,7 @@ class LassoCV(RegressorMixin, LinearModelCV):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
 
-        For int/None inputs, :class:`KFold` is used.
+        For int/None inputs, :class:`~sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
@@ -2006,7 +2004,7 @@ class ElasticNetCV(RegressorMixin, LinearModelCV):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
 
-        For int/None inputs, :class:`KFold` is used.
+        For int/None inputs, :class:`~sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
@@ -2646,7 +2644,7 @@ class MultiTaskElasticNetCV(RegressorMixin, LinearModelCV):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
 
-        For int/None inputs, :class:`KFold` is used.
+        For int/None inputs, :class:`~sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
@@ -2880,7 +2878,7 @@ class MultiTaskLassoCV(RegressorMixin, LinearModelCV):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
 
-        For int/None inputs, :class:`KFold` is used.
+        For int/None inputs, :class:`~sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py
index fea9c4d4cf6ba..1b82bbd77bcf9 100644
--- a/sklearn/linear_model/_glm/__init__.py
+++ b/sklearn/linear_model/_glm/__init__.py
@@ -1,10 +1,10 @@
 # License: BSD 3 clause
 
 from .glm import (
-    _GeneralizedLinearRegressor,
-    PoissonRegressor,
     GammaRegressor,
+    PoissonRegressor,
     TweedieRegressor,
+    _GeneralizedLinearRegressor,
 )
 
 __all__ = [
diff --git a/sklearn/linear_model/_glm/_newton_solver.py b/sklearn/linear_model/_glm/_newton_solver.py
index 68d08d2e7a21b..fa9b431fd2377 100644
--- a/sklearn/linear_model/_glm/_newton_solver.py
+++ b/sklearn/linear_model/_glm/_newton_solver.py
@@ -375,6 +375,7 @@ def solve(self, X, y, sample_weight):
 
         self.iteration = 1
         self.converged = False
+        self.use_fallback_lbfgs_solve = False
 
         while self.iteration <= self.max_iter and not self.converged:
             if self.verbose:
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index b1bc460f24dff..3dc0bbdc66bff 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -11,7 +11,6 @@
 import numpy as np
 import scipy.optimize
 
-from ._newton_solver import NewtonCholeskySolver, NewtonSolver
 from ..._loss.loss import (
     HalfGammaLoss,
     HalfPoissonLoss,
@@ -19,14 +18,14 @@
     HalfTweedieLoss,
     HalfTweedieLossIdentity,
 )
-from ...base import BaseEstimator, RegressorMixin
-from ...base import _fit_context
+from ...base import BaseEstimator, RegressorMixin, _fit_context
 from ...utils import check_array
 from ...utils._openmp_helpers import _openmp_effective_n_threads
 from ...utils._param_validation import Hidden, Interval, StrOptions
 from ...utils.optimize import _check_optimize_result
 from ...utils.validation import _check_sample_weight, check_is_fitted
 from .._linear_loss import LinearModelLoss
+from ._newton_solver import NewtonCholeskySolver, NewtonSolver
 
 
 class _GeneralizedLinearRegressor(RegressorMixin, BaseEstimator):
diff --git a/sklearn/linear_model/_glm/tests/test_glm.py b/sklearn/linear_model/_glm/tests/test_glm.py
index c92ef5f99ca8a..5256a5f370272 100644
--- a/sklearn/linear_model/_glm/tests/test_glm.py
+++ b/sklearn/linear_model/_glm/tests/test_glm.py
@@ -2,22 +2,22 @@
 #
 # License: BSD 3 clause
 
-from functools import partial
 import itertools
 import warnings
+from functools import partial
 
 import numpy as np
-from numpy.testing import assert_allclose
 import pytest
 import scipy
+from numpy.testing import assert_allclose
 from scipy import linalg
 from scipy.optimize import minimize, root
 
-from sklearn.base import clone
 from sklearn._loss import HalfBinomialLoss, HalfPoissonLoss, HalfTweedieLoss
 from sklearn._loss.link import IdentityLink, LogLink
-
+from sklearn.base import clone
 from sklearn.datasets import make_low_rank_matrix, make_regression
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model import (
     GammaRegressor,
     PoissonRegressor,
@@ -27,11 +27,9 @@
 from sklearn.linear_model._glm import _GeneralizedLinearRegressor
 from sklearn.linear_model._glm._newton_solver import NewtonCholeskySolver
 from sklearn.linear_model._linear_loss import LinearModelLoss
-from sklearn.exceptions import ConvergenceWarning
 from sklearn.metrics import d2_tweedie_score, mean_poisson_deviance
 from sklearn.model_selection import train_test_split
 
-
 SOLVERS = ["lbfgs", "newton-cholesky"]
 
 
diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py
index def2ae273d5c4..554f693061116 100644
--- a/sklearn/linear_model/_huber.py
+++ b/sklearn/linear_model/_huber.py
@@ -2,18 +2,17 @@
 # License: BSD 3 clause
 
 from numbers import Integral, Real
-import numpy as np
 
+import numpy as np
 from scipy import optimize
 
-from ..base import BaseEstimator, RegressorMixin
-from ..base import _fit_context
-from ._base import LinearModel
+from ..base import BaseEstimator, RegressorMixin, _fit_context
 from ..utils import axis0_safe_slice
 from ..utils._param_validation import Interval
-from ..utils.validation import _check_sample_weight
 from ..utils.extmath import safe_sparse_dot
 from ..utils.optimize import _check_optimize_result
+from ..utils.validation import _check_sample_weight
+from ._base import LinearModel
 
 
 def _huber_loss_and_gradient(w, X, y, epsilon, alpha, sample_weight=None):
diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py
index e6c653eb80bb3..df85c9bbb6b52 100644
--- a/sklearn/linear_model/_least_angle.py
+++ b/sklearn/linear_model/_least_angle.py
@@ -8,27 +8,24 @@
 #
 # License: BSD 3 clause
 
-from math import log
 import sys
 import warnings
-
+from math import log
 from numbers import Integral, Real
+
 import numpy as np
-from scipy import linalg, interpolate
+from scipy import interpolate, linalg
 from scipy.linalg.lapack import get_lapack_funcs
 
-from ._base import LinearModel, LinearRegression
-from ._base import _deprecate_normalize, _preprocess_data
-from ..base import RegressorMixin, MultiOutputMixin
-from ..base import _fit_context
+from ..base import MultiOutputMixin, RegressorMixin, _fit_context
+from ..exceptions import ConvergenceWarning
+from ..model_selection import check_cv
 
 # mypy error: Module 'sklearn.utils' has no attribute 'arrayfuncs'
-from ..utils import arrayfuncs, as_float_array  # type: ignore
-from ..utils import check_random_state
+from ..utils import arrayfuncs, as_float_array, check_random_state  # type: ignore
 from ..utils._param_validation import Hidden, Interval, StrOptions
-from ..model_selection import check_cv
-from ..exceptions import ConvergenceWarning
-from ..utils.parallel import delayed, Parallel
+from ..utils.parallel import Parallel, delayed
+from ._base import LinearModel, LinearRegression, _deprecate_normalize, _preprocess_data
 
 SOLVE_TRIANGULAR_ARGS = {"check_finite": False}
 
@@ -639,12 +636,6 @@ def _lars_path_solver(
                 # The system is becoming too ill-conditioned.
                 # We have degenerate vectors in our active set.
                 # We'll 'drop for good' the last regressor added.
-
-                # Note: this case is very rare. It is no longer triggered by
-                # the test suite. The `equality_tolerance` margin added in 0.16
-                # to get early stopping to work consistently on all versions of
-                # Python including 32 bit Python under Windows seems to make it
-                # very difficult to trigger the 'drop for good' strategy.
                 warnings.warn(
                     "Regressors in active set degenerate. "
                     "Dropping a regressor, after %i iterations, "
@@ -652,7 +643,7 @@ def _lars_path_solver(
                     "with an active set of %i regressors, and "
                     "the smallest cholesky pivot element being %.3e."
                     " Reduce max_iter or increase eps parameters."
-                    % (n_iter, alpha, n_active, diag),
+                    % (n_iter, alpha.item(), n_active, diag),
                     ConvergenceWarning,
                 )
 
@@ -680,7 +671,7 @@ def _lars_path_solver(
                 "are small and the current value of alpha is no "
                 "longer well controlled. %i iterations, alpha=%.3e, "
                 "previous alpha=%.3e, with an active set of %i "
-                "regressors." % (n_iter, alpha, prev_alpha, n_active),
+                "regressors." % (n_iter, alpha.item(), prev_alpha.item(), n_active),
                 ConvergenceWarning,
             )
             break
@@ -1543,7 +1534,7 @@ class LarsCV(Lars):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
 
-        For integer/None inputs, :class:`KFold` is used.
+        For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
@@ -1847,7 +1838,7 @@ class LassoLarsCV(LarsCV):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
 
-        For integer/None inputs, :class:`KFold` is used.
+        For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
diff --git a/sklearn/linear_model/_linear_loss.py b/sklearn/linear_model/_linear_loss.py
index f70d78fb42871..92a203abc87ab 100644
--- a/sklearn/linear_model/_linear_loss.py
+++ b/sklearn/linear_model/_linear_loss.py
@@ -3,6 +3,7 @@
 """
 import numpy as np
 from scipy import sparse
+
 from ..utils.extmath import squared_norm
 
 
diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
index 30a0f40a0f2fd..6453cee2ac22e 100644
--- a/sklearn/linear_model/_logistic.py
+++ b/sklearn/linear_model/_logistic.py
@@ -11,35 +11,37 @@
 #         Arthur Mensch <arthur.mensch@m4x.org
 
 import numbers
-from numbers import Integral, Real
 import warnings
+from numbers import Integral, Real
 
 import numpy as np
-from scipy import optimize
 from joblib import effective_n_jobs
+from scipy import optimize
 
 from sklearn.metrics import get_scorer_names
 
-from ._base import LinearClassifierMixin, SparseCoefMixin, BaseEstimator
-from ._linear_loss import LinearModelLoss
-from ._sag import sag_solver
-from ._glm.glm import NewtonCholeskySolver
-from ..base import _fit_context
 from .._loss.loss import HalfBinomialLoss, HalfMultinomialLoss
-from ..preprocessing import LabelEncoder, LabelBinarizer
+from ..base import _fit_context
+from ..metrics import get_scorer
+from ..model_selection import check_cv
+from ..preprocessing import LabelBinarizer, LabelEncoder
 from ..svm._base import _fit_liblinear
-from ..utils import check_array, check_consistent_length, compute_class_weight
-from ..utils import check_random_state
-from ..utils.extmath import softmax
-from ..utils.extmath import row_norms
-from ..utils.optimize import _newton_cg, _check_optimize_result
-from ..utils.validation import check_is_fitted, _check_sample_weight
+from ..utils import (
+    check_array,
+    check_consistent_length,
+    check_random_state,
+    compute_class_weight,
+)
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import row_norms, softmax
 from ..utils.multiclass import check_classification_targets
-from ..utils.parallel import delayed, Parallel
-from ..utils._param_validation import StrOptions, Interval
-from ..model_selection import check_cv
-from ..metrics import get_scorer
-
+from ..utils.optimize import _check_optimize_result, _newton_cg
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import _check_sample_weight, check_is_fitted
+from ._base import BaseEstimator, LinearClassifierMixin, SparseCoefMixin
+from ._glm.glm import NewtonCholeskySolver
+from ._linear_loss import LinearModelLoss
+from ._sag import sag_solver
 
 _LOGISTIC_SOLVER_CONVERGENCE_MSG = (
     "Please also refer to the documentation for alternative solver options:\n"
@@ -835,8 +837,9 @@ class LogisticRegression(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
            in 1.4. Use `None` instead.
 
     dual : bool, default=False
-        Dual or primal formulation. Dual formulation is only implemented for
-        l2 penalty with liblinear solver. Prefer dual=False when
+        Dual (constrained) or primal (regularized, see also
+        :ref:`this equation <regularized-logistic-loss>`) formulation. Dual formulation
+        is only implemented for l2 penalty with liblinear solver. Prefer dual=False when
         n_samples > n_features.
 
     tol : float, default=1e-4
@@ -1453,8 +1456,9 @@ class LogisticRegressionCV(LogisticRegression, LinearClassifierMixin, BaseEstima
             ``cv`` default value if None changed from 3-fold to 5-fold.
 
     dual : bool, default=False
-        Dual or primal formulation. Dual formulation is only implemented for
-        l2 penalty with liblinear solver. Prefer dual=False when
+        Dual (constrained) or primal (regularized, see also
+        :ref:`this equation <regularized-logistic-loss>`) formulation. Dual formulation
+        is only implemented for l2 penalty with liblinear solver. Prefer dual=False when
         n_samples > n_features.
 
     penalty : {'l1', 'l2', 'elasticnet'}, default='l2'
diff --git a/sklearn/linear_model/_omp.py b/sklearn/linear_model/_omp.py
index df451a99417b0..689d3df53b104 100644
--- a/sklearn/linear_model/_omp.py
+++ b/sklearn/linear_model/_omp.py
@@ -7,20 +7,18 @@
 
 import warnings
 from math import sqrt
-
 from numbers import Integral, Real
+
 import numpy as np
 from scipy import linalg
 from scipy.linalg.lapack import get_lapack_funcs
 
-from ._base import LinearModel, _pre_fit, _deprecate_normalize
-from ..base import RegressorMixin, MultiOutputMixin
-from ..base import _fit_context
-from ..utils import as_float_array, check_array
-from ..utils.parallel import delayed, Parallel
-from ..utils._param_validation import Hidden, Interval, StrOptions
-from ..utils._param_validation import validate_params
+from ..base import MultiOutputMixin, RegressorMixin, _fit_context
 from ..model_selection import check_cv
+from ..utils import as_float_array, check_array
+from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
+from ..utils.parallel import Parallel, delayed
+from ._base import LinearModel, _deprecate_normalize, _pre_fit
 
 premature = (
     "Orthogonal matching pursuit ended prematurely due to linear"
@@ -293,7 +291,8 @@ def _gram_omp(
         "copy_X": ["boolean"],
         "return_path": ["boolean"],
         "return_n_iter": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def orthogonal_mp(
     X,
@@ -333,7 +332,7 @@ def orthogonal_mp(
         default) this value is set to 10% of n_features.
 
     tol : float, default=None
-        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
+        Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs.
 
     precompute : 'auto' or bool, default=False
         Whether to perform precomputations. Improves performance when n_targets
@@ -480,7 +479,8 @@ def orthogonal_mp_gram(
         default) this value is set to 10% of n_features.
 
     tol : float, default=None
-        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
+        Maximum squared norm of the residual. If not `None`,
+        overrides `n_nonzero_coefs`.
 
     norms_squared : array-like of shape (n_targets,), default=None
         Squared L2 norms of the lines of y. Required if tol is not None.
@@ -612,7 +612,7 @@ class OrthogonalMatchingPursuit(MultiOutputMixin, RegressorMixin, LinearModel):
         default) this value is set to 10% of n_features.
 
     tol : float, default=None
-        Maximum norm of the residual. If not None, overrides n_nonzero_coefs.
+        Maximum squared norm of the residual. If not None, overrides n_nonzero_coefs.
 
     fit_intercept : bool, default=True
         Whether to calculate the intercept for this model. If set
@@ -935,7 +935,7 @@ class OrthogonalMatchingPursuitCV(RegressorMixin, LinearModel):
         - :term:`CV splitter`,
         - An iterable yielding (train, test) splits as arrays of indices.
 
-        For integer/None inputs, :class:`KFold` is used.
+        For integer/None inputs, :class:`~sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
         cross-validation strategies that can be used here.
diff --git a/sklearn/linear_model/_passive_aggressive.py b/sklearn/linear_model/_passive_aggressive.py
index a9c81799c8ca3..d27cc928ca056 100644
--- a/sklearn/linear_model/_passive_aggressive.py
+++ b/sklearn/linear_model/_passive_aggressive.py
@@ -2,11 +2,9 @@
 # License: BSD 3 clause
 from numbers import Real
 
-from ._stochastic_gradient import BaseSGDClassifier
-from ._stochastic_gradient import BaseSGDRegressor
-from ._stochastic_gradient import DEFAULT_EPSILON
 from ..base import _fit_context
 from ..utils._param_validation import Interval, StrOptions
+from ._stochastic_gradient import DEFAULT_EPSILON, BaseSGDClassifier, BaseSGDRegressor
 
 
 class PassiveAggressiveClassifier(BaseSGDClassifier):
@@ -26,7 +24,7 @@ class PassiveAggressiveClassifier(BaseSGDClassifier):
     max_iter : int, default=1000
         The maximum number of passes over the training data (aka epochs).
         It only impacts the behavior in the ``fit`` method, and not the
-        :meth:`partial_fit` method.
+        :meth:`PassiveAggressive.partial_fit` method.
 
         .. versionadded:: 0.19
 
diff --git a/sklearn/linear_model/_perceptron.py b/sklearn/linear_model/_perceptron.py
index 09b6ae48cb5e8..30e781983365e 100644
--- a/sklearn/linear_model/_perceptron.py
+++ b/sklearn/linear_model/_perceptron.py
@@ -2,8 +2,8 @@
 # License: BSD 3 clause
 from numbers import Real
 
+from ..utils._param_validation import Interval, StrOptions
 from ._stochastic_gradient import BaseSGDClassifier
-from ..utils._param_validation import StrOptions, Interval
 
 
 class Perceptron(BaseSGDClassifier):
diff --git a/sklearn/linear_model/_quantile.py b/sklearn/linear_model/_quantile.py
index b4a5581386a5f..8bd59485c5062 100644
--- a/sklearn/linear_model/_quantile.py
+++ b/sklearn/linear_model/_quantile.py
@@ -8,14 +8,13 @@
 from scipy import sparse
 from scipy.optimize import linprog
 
-from ..base import BaseEstimator, RegressorMixin
-from ..base import _fit_context
-from ._base import LinearModel
+from ..base import BaseEstimator, RegressorMixin, _fit_context
 from ..exceptions import ConvergenceWarning
 from ..utils import _safe_indexing
-from ..utils.validation import _check_sample_weight
-from ..utils.fixes import sp_version, parse_version
 from ..utils._param_validation import Hidden, Interval, StrOptions
+from ..utils.fixes import parse_version, sp_version
+from ..utils.validation import _check_sample_weight
+from ._base import LinearModel
 
 
 class QuantileRegressor(LinearModel, RegressorMixin, BaseEstimator):
diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
index 1c12ecc13a258..01d3207ab5736 100644
--- a/sklearn/linear_model/_ransac.py
+++ b/sklearn/linear_model/_ransac.py
@@ -2,22 +2,31 @@
 #
 # License: BSD 3 clause
 
-from numbers import Integral, Real
 import warnings
+from numbers import Integral, Real
 
 import numpy as np
 
-from ..base import BaseEstimator, MetaEstimatorMixin, RegressorMixin, clone
-from ..base import MultiOutputMixin
-from ..base import _fit_context
-from ..utils import check_random_state, check_consistent_length
+from ..base import (
+    BaseEstimator,
+    MetaEstimatorMixin,
+    MultiOutputMixin,
+    RegressorMixin,
+    _fit_context,
+    clone,
+)
+from ..exceptions import ConvergenceWarning
+from ..utils import check_consistent_length, check_random_state
+from ..utils._param_validation import (
+    HasMethods,
+    Interval,
+    Options,
+    RealNotInt,
+    StrOptions,
+)
 from ..utils.random import sample_without_replacement
-from ..utils.validation import check_is_fitted, _check_sample_weight
+from ..utils.validation import _check_sample_weight, check_is_fitted, has_fit_parameter
 from ._base import LinearRegression
-from ..utils.validation import has_fit_parameter
-from ..utils._param_validation import Interval, Options, StrOptions, HasMethods
-from ..utils._param_validation import RealNotInt
-from ..exceptions import ConvergenceWarning
 
 _EPSILON = np.spacing(1)
 
@@ -92,10 +101,11 @@ class RANSACRegressor(
         relative number `ceil(min_samples * X.shape[0])` for
         `min_samples < 1`. This is typically chosen as the minimal number of
         samples necessary to estimate the given `estimator`. By default a
-        ``sklearn.linear_model.LinearRegression()`` estimator is assumed and
+        :class:`~sklearn.linear_model.LinearRegression` estimator is assumed and
         `min_samples` is chosen as ``X.shape[1] + 1``. This parameter is highly
         dependent upon the model, so if a `estimator` other than
-        :class:`linear_model.LinearRegression` is used, the user must provide a value.
+        :class:`~sklearn.linear_model.LinearRegression` is used, the user must
+        provide a value.
 
     residual_threshold : float, default=None
         Maximum residual for a data sample to be classified as an inlier.
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 893b10d1d93ae..03af59376cc8d 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -9,40 +9,35 @@
 # License: BSD 3 clause
 
 
+import numbers
+import warnings
 from abc import ABCMeta, abstractmethod
 from functools import partial
 from numbers import Integral, Real
-import warnings
 
 import numpy as np
-import numbers
-from scipy import linalg
-from scipy import sparse
-from scipy import optimize
+from scipy import linalg, optimize, sparse
 from scipy.sparse import linalg as sp_linalg
 
-from ._base import LinearClassifierMixin, LinearModel
-from ._base import _preprocess_data, _rescale_data
-from ._sag import sag_solver
-from ..base import MultiOutputMixin, RegressorMixin, is_classifier
-from ..base import _fit_context
-from ..utils.extmath import safe_sparse_dot
-from ..utils.extmath import row_norms
-from ..utils import check_array
-from ..utils import check_consistent_length
-from ..utils import check_scalar
-from ..utils import compute_sample_weight
-from ..utils import column_or_1d
-from ..utils.validation import check_is_fitted
-from ..utils.validation import _check_sample_weight
-from ..utils._param_validation import Interval
-from ..utils._param_validation import StrOptions
-from ..preprocessing import LabelBinarizer
-from ..model_selection import GridSearchCV
-from ..metrics import check_scoring
-from ..metrics import get_scorer_names
+from ..base import MultiOutputMixin, RegressorMixin, _fit_context, is_classifier
 from ..exceptions import ConvergenceWarning
+from ..metrics import check_scoring, get_scorer_names
+from ..model_selection import GridSearchCV
+from ..preprocessing import LabelBinarizer
+from ..utils import (
+    check_array,
+    check_consistent_length,
+    check_scalar,
+    column_or_1d,
+    compute_sample_weight,
+)
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import row_norms, safe_sparse_dot
+from ..utils.fixes import _sparse_linalg_cg
 from ..utils.sparsefuncs import mean_variance_axis
+from ..utils.validation import _check_sample_weight, check_is_fitted
+from ._base import LinearClassifierMixin, LinearModel, _preprocess_data, _rescale_data
+from ._sag import sag_solver
 
 
 def _get_rescaled_operator(X, X_offset, sample_weight_sqrt):
@@ -111,12 +106,7 @@ def _mv(x):
             C = sp_linalg.LinearOperator(
                 (n_samples, n_samples), matvec=mv, dtype=X.dtype
             )
-            # FIXME atol
-            try:
-                coef, info = sp_linalg.cg(C, y_column, tol=tol, atol="legacy")
-            except TypeError:
-                # old scipy
-                coef, info = sp_linalg.cg(C, y_column, tol=tol)
+            coef, info = _sparse_linalg_cg(C, y_column, rtol=tol)
             coefs[i] = X1.rmatvec(coef)
         else:
             # linear ridge
@@ -125,14 +115,7 @@ def _mv(x):
             C = sp_linalg.LinearOperator(
                 (n_features, n_features), matvec=mv, dtype=X.dtype
             )
-            # FIXME atol
-            try:
-                coefs[i], info = sp_linalg.cg(
-                    C, y_column, maxiter=max_iter, tol=tol, atol="legacy"
-                )
-            except TypeError:
-                # old scipy
-                coefs[i], info = sp_linalg.cg(C, y_column, maxiter=max_iter, tol=tol)
+            coefs[i], info = _sparse_linalg_cg(C, y_column, maxiter=max_iter, rtol=tol)
 
         if info < 0:
             raise ValueError("Failed with error code %d" % info)
diff --git a/sklearn/linear_model/_sag.py b/sklearn/linear_model/_sag.py
index b7860edd43031..2626955ec2a7f 100644
--- a/sklearn/linear_model/_sag.py
+++ b/sklearn/linear_model/_sag.py
@@ -8,12 +8,12 @@
 
 import numpy as np
 
-from ._base import make_dataset
-from ._sag_fast import sag32, sag64
 from ..exceptions import ConvergenceWarning
 from ..utils import check_array
-from ..utils.validation import _check_sample_weight
 from ..utils.extmath import row_norms
+from ..utils.validation import _check_sample_weight
+from ._base import make_dataset
+from ._sag_fast import sag32, sag64
 
 
 def get_auto_step_size(
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index bc8f31016c6f8..8456b3456291a 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -6,39 +6,42 @@
 Descent (SGD).
 """
 
-import numpy as np
 import warnings
-
 from abc import ABCMeta, abstractmethod
 from numbers import Integral, Real
 
-from ..base import clone, is_classifier
-from ..base import _fit_context
-from ._base import LinearClassifierMixin, SparseCoefMixin
-from ._base import make_dataset
-from ..base import BaseEstimator, RegressorMixin, OutlierMixin
-from ..utils import check_random_state
-from ..utils.metaestimators import available_if
+import numpy as np
+
+from ..base import (
+    BaseEstimator,
+    OutlierMixin,
+    RegressorMixin,
+    _fit_context,
+    clone,
+    is_classifier,
+)
+from ..exceptions import ConvergenceWarning
+from ..model_selection import ShuffleSplit, StratifiedShuffleSplit
+from ..utils import check_random_state, compute_class_weight
+from ..utils._param_validation import Hidden, Interval, StrOptions
 from ..utils.extmath import safe_sparse_dot
+from ..utils.metaestimators import available_if
 from ..utils.multiclass import _check_partial_fit_first_call
-from ..utils.validation import check_is_fitted, _check_sample_weight
-from ..utils._param_validation import Interval
-from ..utils._param_validation import StrOptions
-from ..utils._param_validation import Hidden
-from ..utils.parallel import delayed, Parallel
-from ..exceptions import ConvergenceWarning
-from ..model_selection import StratifiedShuffleSplit, ShuffleSplit
-
-from ._sgd_fast import _plain_sgd32, _plain_sgd64
-from ..utils import compute_class_weight
-from ._sgd_fast import Hinge
-from ._sgd_fast import SquaredHinge
-from ._sgd_fast import Log
-from ._sgd_fast import ModifiedHuber
-from ._sgd_fast import SquaredLoss
-from ._sgd_fast import Huber
-from ._sgd_fast import EpsilonInsensitive
-from ._sgd_fast import SquaredEpsilonInsensitive
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import _check_sample_weight, check_is_fitted
+from ._base import LinearClassifierMixin, SparseCoefMixin, make_dataset
+from ._sgd_fast import (
+    EpsilonInsensitive,
+    Hinge,
+    Huber,
+    Log,
+    ModifiedHuber,
+    SquaredEpsilonInsensitive,
+    SquaredHinge,
+    SquaredLoss,
+    _plain_sgd32,
+    _plain_sgd64,
+)
 
 LEARNING_RATE_TYPES = {
     "constant": 1,
diff --git a/sklearn/linear_model/_theil_sen.py b/sklearn/linear_model/_theil_sen.py
index 72c2d897681c4..cc774e8783762 100644
--- a/sklearn/linear_model/_theil_sen.py
+++ b/sklearn/linear_model/_theil_sen.py
@@ -8,22 +8,21 @@
 
 
 import warnings
-from numbers import Integral, Real
 from itertools import combinations
+from numbers import Integral, Real
 
 import numpy as np
+from joblib import effective_n_jobs
 from scipy import linalg
-from scipy.special import binom
 from scipy.linalg.lapack import get_lapack_funcs
-from joblib import effective_n_jobs
+from scipy.special import binom
 
-from ._base import LinearModel
-from ..base import RegressorMixin
-from ..base import _fit_context
+from ..base import RegressorMixin, _fit_context
+from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
 from ..utils._param_validation import Interval
-from ..utils.parallel import delayed, Parallel
-from ..exceptions import ConvergenceWarning
+from ..utils.parallel import Parallel, delayed
+from ._base import LinearModel
 
 _EPSILON = np.finfo(np.double).eps
 
diff --git a/sklearn/linear_model/tests/test_base.py b/sklearn/linear_model/tests/test_base.py
index 92932042ca428..aea224d454b21 100644
--- a/sklearn/linear_model/tests/test_base.py
+++ b/sklearn/linear_model/tests/test_base.py
@@ -4,27 +4,26 @@
 #
 # License: BSD 3 clause
 
-import pytest
 import warnings
 
 import numpy as np
-from scipy import sparse
-from scipy import linalg
-
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
+import pytest
+from scipy import linalg, sparse
 
+from sklearn.datasets import load_iris, make_regression, make_sparse_uncorrelated
 from sklearn.linear_model import LinearRegression
-from sklearn.linear_model._base import _deprecate_normalize
-from sklearn.linear_model._base import _preprocess_data
-from sklearn.linear_model._base import _rescale_data
-from sklearn.linear_model._base import make_dataset
-from sklearn.datasets import make_sparse_uncorrelated
-from sklearn.datasets import make_regression
-from sklearn.datasets import load_iris
-from sklearn.preprocessing import StandardScaler
-from sklearn.preprocessing import add_dummy_feature
+from sklearn.linear_model._base import (
+    _deprecate_normalize,
+    _preprocess_data,
+    _rescale_data,
+    make_dataset,
+)
+from sklearn.preprocessing import StandardScaler, add_dummy_feature
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 rtol = 1e-6
 
@@ -874,7 +873,7 @@ def test_linear_regression_sample_weight_consistency(
         # ::test_linear_regression_sample_weight_consistency
         pass
     else:
-        assert_allclose(reg.coef_, coef_0, rtol=1e-6)
+        assert_allclose(reg.coef_, coef_0, rtol=1e-5)
         if fit_intercept:
             assert_allclose(reg.intercept_, intercept_0)
 
diff --git a/sklearn/linear_model/tests/test_bayes.py b/sklearn/linear_model/tests/test_bayes.py
index b33e656335e1a..ab269ebf160fb 100644
--- a/sklearn/linear_model/tests/test_bayes.py
+++ b/sklearn/linear_model/tests/test_bayes.py
@@ -8,14 +8,14 @@
 import numpy as np
 import pytest
 
-
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_less
-from sklearn.utils import check_random_state
-from sklearn.linear_model import BayesianRidge, ARDRegression
-from sklearn.linear_model import Ridge
 from sklearn import datasets
+from sklearn.linear_model import ARDRegression, BayesianRidge, Ridge
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_less,
+)
 from sklearn.utils.extmath import fast_logdet
 
 diabetes = datasets.load_diabetes()
diff --git a/sklearn/linear_model/tests/test_coordinate_descent.py b/sklearn/linear_model/tests/test_coordinate_descent.py
index a12a1a0ec792f..f5791ddb4e081 100644
--- a/sklearn/linear_model/tests/test_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_coordinate_descent.py
@@ -2,45 +2,26 @@
 #          Alexandre Gramfort <alexandre.gramfort@inria.fr>
 # License: BSD 3 clause
 
-import numpy as np
-import pytest
 import warnings
-from scipy import interpolate, sparse
 from copy import deepcopy
+
 import joblib
+import numpy as np
+import pytest
+from scipy import interpolate, sparse
 
-from sklearn.base import is_classifier
-from sklearn.base import clone
-from sklearn.datasets import load_diabetes
-from sklearn.datasets import make_regression
-from sklearn.model_selection import (
-    GridSearchCV,
-    LeaveOneGroupOut,
-    train_test_split,
-)
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import StandardScaler
+from sklearn.base import clone, is_classifier
+from sklearn.datasets import load_diabetes, make_regression
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.utils._testing import TempMemmap
-
 from sklearn.linear_model import (
     ElasticNet,
     ElasticNetCV,
-    enet_path,
     Lars,
-    lars_path,
     Lasso,
     LassoCV,
     LassoLars,
     LassoLarsCV,
     LassoLarsIC,
-    lasso_path,
     LinearRegression,
     MultiTaskElasticNet,
     MultiTaskElasticNetCV,
@@ -51,10 +32,27 @@
     RidgeClassifier,
     RidgeClassifierCV,
     RidgeCV,
+    enet_path,
+    lars_path,
+    lasso_path,
 )
-
 from sklearn.linear_model._coordinate_descent import _set_order
+from sklearn.model_selection import (
+    GridSearchCV,
+    LeaveOneGroupOut,
+    train_test_split,
+)
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
 from sklearn.utils import check_array
+from sklearn.utils._testing import (
+    TempMemmap,
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 
 
 @pytest.mark.parametrize("order", ["C", "F"])
@@ -86,12 +84,10 @@ def test_set_order_sparse(order, input_order):
     X = X.asformat(sparse_format)
     y = X.asformat(sparse_format)
     X2, y2 = _set_order(X, y, order=order)
-    if order == "C":
-        assert sparse.isspmatrix_csr(X2)
-        assert sparse.isspmatrix_csr(y2)
-    elif order == "F":
-        assert sparse.isspmatrix_csc(X2)
-        assert sparse.isspmatrix_csc(y2)
+
+    format = "csc" if order == "F" else "csr"
+    assert sparse.issparse(X2) and X2.format == format
+    assert sparse.issparse(y2) and y2.format == format
 
 
 def test_lasso_zero():
@@ -274,8 +270,8 @@ def test_lasso_cv():
 
 
 def test_lasso_cv_with_some_model_selection():
-    from sklearn.model_selection import ShuffleSplit
     from sklearn import datasets
+    from sklearn.model_selection import ShuffleSplit
 
     diabetes = datasets.load_diabetes()
     X = diabetes.data
diff --git a/sklearn/linear_model/tests/test_huber.py b/sklearn/linear_model/tests/test_huber.py
index 88a5d096772b3..d2552d1b990fd 100644
--- a/sklearn/linear_model/tests/test_huber.py
+++ b/sklearn/linear_model/tests/test_huber.py
@@ -4,13 +4,14 @@
 import numpy as np
 from scipy import optimize, sparse
 
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-
 from sklearn.datasets import make_regression
-from sklearn.linear_model import HuberRegressor, LinearRegression, SGDRegressor, Ridge
+from sklearn.linear_model import HuberRegressor, LinearRegression, Ridge, SGDRegressor
 from sklearn.linear_model._huber import _huber_loss_and_gradient
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 
 def make_regression_with_outliers(n_samples=50, n_features=20):
diff --git a/sklearn/linear_model/tests/test_least_angle.py b/sklearn/linear_model/tests/test_least_angle.py
index ea47d529b2340..6177940974f28 100644
--- a/sklearn/linear_model/tests/test_least_angle.py
+++ b/sklearn/linear_model/tests/test_least_angle.py
@@ -3,20 +3,29 @@
 import numpy as np
 import pytest
 from scipy import linalg
+
+from sklearn import datasets, linear_model
 from sklearn.base import clone
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import (
+    Lars,
+    LarsCV,
+    LassoLars,
+    LassoLarsCV,
+    LassoLarsIC,
+    lars_path,
+)
+from sklearn.linear_model._least_angle import _lars_path_residues
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils._testing import TempMemmap
 from sklearn.utils import check_random_state
-from sklearn.exceptions import ConvergenceWarning
-from sklearn import linear_model, datasets
-from sklearn.linear_model._least_angle import _lars_path_residues
-from sklearn.linear_model import LassoLarsIC, lars_path
-from sklearn.linear_model import Lars, LassoLars, LarsCV, LassoLarsCV
+from sklearn.utils._testing import (
+    TempMemmap,
+    assert_allclose,
+    assert_array_almost_equal,
+    ignore_warnings,
+)
 
 # TODO: use another dataset that has multiple drops
 diabetes = datasets.load_diabetes()
@@ -59,8 +68,8 @@ def test_simple():
     # Principle of Lars is to keep covariances tied and decreasing
 
     # also test verbose output
-    from io import StringIO
     import sys
+    from io import StringIO
 
     old_stdout = sys.stdout
     try:
diff --git a/sklearn/linear_model/tests/test_linear_loss.py b/sklearn/linear_model/tests/test_linear_loss.py
index 0c0053a103098..99b3cbdcf3f23 100644
--- a/sklearn/linear_model/tests/test_linear_loss.py
+++ b/sklearn/linear_model/tests/test_linear_loss.py
@@ -4,8 +4,8 @@
 Note that correctness of losses (which compose LinearModelLoss) is already well
 covered in the _loss module.
 """
-import pytest
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 from scipy import linalg, optimize, sparse
 
@@ -18,7 +18,6 @@
 from sklearn.linear_model._linear_loss import LinearModelLoss
 from sklearn.utils.extmath import squared_norm
 
-
 # We do not need to test all losses, just what LinearModelLoss does on top of the
 # base losses.
 LOSSES = [HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss]
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index a470fe412ab36..85f5c2d52b745 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -2,37 +2,42 @@
 import os
 import warnings
 from functools import partial
-import numpy as np
-from numpy.testing import assert_allclose, assert_almost_equal
-from numpy.testing import assert_array_almost_equal, assert_array_equal
-from scipy import sparse
 
+import numpy as np
 import pytest
+from numpy.testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from scipy import sparse
 
 from sklearn.base import clone
 from sklearn.datasets import load_iris, make_classification
-from sklearn.metrics import log_loss
-from sklearn.metrics import get_scorer
-from sklearn.model_selection import StratifiedKFold
-from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import cross_val_score
-from sklearn.preprocessing import LabelEncoder, StandardScaler
-from sklearn.utils import compute_class_weight, _IS_32BIT
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils import shuffle
-from sklearn.linear_model import SGDClassifier
-from sklearn.preprocessing import scale
-from sklearn.utils._testing import skip_if_no_parallel
-from sklearn.svm import l1_min_c
-
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import SGDClassifier
 from sklearn.linear_model._logistic import (
-    _log_reg_scoring_path,
-    _logistic_regression_path,
     LogisticRegression as LogisticRegressionDefault,
+)
+from sklearn.linear_model._logistic import (
     LogisticRegressionCV as LogisticRegressionCVDefault,
 )
+from sklearn.linear_model._logistic import (
+    _log_reg_scoring_path,
+    _logistic_regression_path,
+)
+from sklearn.metrics import get_scorer, log_loss
+from sklearn.model_selection import (
+    GridSearchCV,
+    StratifiedKFold,
+    cross_val_score,
+    train_test_split,
+)
+from sklearn.preprocessing import LabelEncoder, StandardScaler, scale
+from sklearn.svm import l1_min_c
+from sklearn.utils import _IS_32BIT, compute_class_weight, shuffle
+from sklearn.utils._testing import ignore_warnings, skip_if_no_parallel
 
 pytestmark = pytest.mark.filterwarnings(
     "error::sklearn.exceptions.ConvergenceWarning:sklearn.*"
@@ -1993,15 +1998,16 @@ def test_sample_weight_not_modified(multi_class, class_weight):
 
 
 @pytest.mark.parametrize("solver", SOLVERS)
-def test_large_sparse_matrix(solver):
+def test_large_sparse_matrix(solver, global_random_seed):
     # Solvers either accept large sparse matrices, or raise helpful error.
     # Non-regression test for pull-request #21093.
 
     # generate sparse matrix with int64 indices
-    X = sparse.rand(20, 10, format="csr")
+    X = sparse.rand(20, 10, format="csr", random_state=global_random_seed)
     for attr in ["indices", "indptr"]:
         setattr(X, attr, getattr(X, attr).astype("int64"))
-    y = np.random.randint(2, size=X.shape[0])
+    rng = np.random.RandomState(global_random_seed)
+    y = rng.randint(2, size=X.shape[0])
 
     if solver in ["liblinear", "sag", "saga"]:
         msg = "Only sparse matrices with 32-bit integer indices"
@@ -2057,3 +2063,29 @@ def test_liblinear_not_stuck():
     with warnings.catch_warnings():
         warnings.simplefilter("error", ConvergenceWarning)
         clf.fit(X_prep, y)
+
+
+@pytest.mark.parametrize("solver", SOLVERS)
+def test_zero_max_iter(solver):
+    # Make sure we can inspect the state of LogisticRegression right after
+    # initialization (before the first weight update).
+    X, y = load_iris(return_X_y=True)
+    y = y == 2
+    with ignore_warnings(category=ConvergenceWarning):
+        clf = LogisticRegression(solver=solver, max_iter=0).fit(X, y)
+    if solver not in ["saga", "sag"]:
+        # XXX: sag and saga have n_iter_ = [1]...
+        assert clf.n_iter_ == 0
+
+    if solver != "lbfgs":
+        # XXX: lbfgs has already started to update the coefficients...
+        assert_allclose(clf.coef_, np.zeros_like(clf.coef_))
+        assert_allclose(
+            clf.decision_function(X),
+            np.full(shape=X.shape[0], fill_value=clf.intercept_),
+        )
+        assert_allclose(
+            clf.predict_proba(X),
+            np.full(shape=(X.shape[0], 2), fill_value=0.5),
+        )
+    assert clf.score(X, y) < 0.7
diff --git a/sklearn/linear_model/tests/test_omp.py b/sklearn/linear_model/tests/test_omp.py
index 599e2940f9403..47b5d01744b27 100644
--- a/sklearn/linear_model/tests/test_omp.py
+++ b/sklearn/linear_model/tests/test_omp.py
@@ -1,25 +1,26 @@
 # Author: Vlad Niculae
 # License: BSD 3 clause
 
-import numpy as np
-import pytest
 import warnings
 
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import ignore_warnings
-
+import numpy as np
+import pytest
 
+from sklearn.datasets import make_sparse_coded_signal
 from sklearn.linear_model import (
-    orthogonal_mp,
-    orthogonal_mp_gram,
+    LinearRegression,
     OrthogonalMatchingPursuit,
     OrthogonalMatchingPursuitCV,
-    LinearRegression,
+    orthogonal_mp,
+    orthogonal_mp_gram,
 )
 from sklearn.utils import check_random_state
-from sklearn.datasets import make_sparse_coded_signal
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 
 n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3
 y, X, gamma = make_sparse_coded_signal(
diff --git a/sklearn/linear_model/tests/test_passive_aggressive.py b/sklearn/linear_model/tests/test_passive_aggressive.py
index 06b6bd5b84cb1..031376d034b32 100644
--- a/sklearn/linear_model/tests/test_passive_aggressive.py
+++ b/sklearn/linear_model/tests/test_passive_aggressive.py
@@ -1,16 +1,16 @@
 import numpy as np
-import scipy.sparse as sp
-
 import pytest
+import scipy.sparse as sp
 
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_almost_equal
 from sklearn.base import ClassifierMixin
-from sklearn.utils import check_random_state
 from sklearn.datasets import load_iris
-from sklearn.linear_model import PassiveAggressiveClassifier
-from sklearn.linear_model import PassiveAggressiveRegressor
+from sklearn.linear_model import PassiveAggressiveClassifier, PassiveAggressiveRegressor
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 iris = load_iris()
 random_state = check_random_state(12)
diff --git a/sklearn/linear_model/tests/test_perceptron.py b/sklearn/linear_model/tests/test_perceptron.py
index 4c4f092c69d71..e2c947a887bde 100644
--- a/sklearn/linear_model/tests/test_perceptron.py
+++ b/sklearn/linear_model/tests/test_perceptron.py
@@ -1,12 +1,11 @@
 import numpy as np
-import scipy.sparse as sp
 import pytest
+import scipy.sparse as sp
 
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils import check_random_state
 from sklearn.datasets import load_iris
 from sklearn.linear_model import Perceptron
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_allclose, assert_array_almost_equal
 
 iris = load_iris()
 random_state = check_random_state(12)
diff --git a/sklearn/linear_model/tests/test_quantile.py b/sklearn/linear_model/tests/test_quantile.py
index ed87e60ae0df4..da637813fbb62 100644
--- a/sklearn/linear_model/tests/test_quantile.py
+++ b/sklearn/linear_model/tests/test_quantile.py
@@ -5,15 +5,14 @@
 import numpy as np
 import pytest
 from pytest import approx
-from scipy.optimize import minimize
 from scipy import sparse
+from scipy.optimize import minimize
 
 from sklearn.datasets import make_regression
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model import HuberRegressor, QuantileRegressor
 from sklearn.metrics import mean_pinball_loss
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import skip_if_32bit
+from sklearn.utils._testing import assert_allclose, skip_if_32bit
 from sklearn.utils.fixes import parse_version, sp_version
 
 
@@ -28,6 +27,10 @@ def default_solver():
     return "highs" if sp_version >= parse_version("1.6.0") else "interior-point"
 
 
+@pytest.mark.skipif(
+    parse_version(sp_version.base_version) >= parse_version("1.11"),
+    reason="interior-point solver is not available in SciPy 1.11",
+)
 @pytest.mark.parametrize("solver", ["interior-point", "revised simplex"])
 def test_incompatible_solver_for_sparse_input(X_y_data, solver):
     X, y = X_y_data
@@ -237,6 +240,10 @@ def test_equivariance(quantile, default_solver):
     assert_allclose(model2.coef_, np.linalg.solve(A, model1.coef_), rtol=1e-5)
 
 
+@pytest.mark.skipif(
+    parse_version(sp_version.base_version) >= parse_version("1.11"),
+    reason="interior-point solver is not available in SciPy 1.11",
+)
 @pytest.mark.filterwarnings("ignore:`method='interior-point'` is deprecated")
 def test_linprog_failure():
     """Test that linprog fails."""
@@ -280,6 +287,10 @@ def test_sparse_input(sparse_format, solver, fit_intercept, default_solver):
 
 
 # TODO (1.4): remove this test in 1.4
+@pytest.mark.skipif(
+    parse_version(sp_version.base_version) >= parse_version("1.11"),
+    reason="interior-point solver is not available in SciPy 1.11",
+)
 def test_warning_new_default(X_y_data):
     """Check that we warn about the new default solver."""
     X, y = X_y_data
diff --git a/sklearn/linear_model/tests/test_ransac.py b/sklearn/linear_model/tests/test_ransac.py
index b39c50340ee70..c4d50bedf87b8 100644
--- a/sklearn/linear_model/tests/test_ransac.py
+++ b/sklearn/linear_model/tests/test_ransac.py
@@ -1,18 +1,19 @@
 import numpy as np
 import pytest
+from numpy.testing import assert_array_almost_equal, assert_array_equal
 from scipy import sparse
 
-from numpy.testing import assert_array_almost_equal
-from numpy.testing import assert_array_equal
-
-from sklearn.utils import check_random_state
-from sklearn.utils._testing import assert_allclose
 from sklearn.datasets import make_regression
-from sklearn.linear_model import LinearRegression, RANSACRegressor, Ridge
-from sklearn.linear_model import OrthogonalMatchingPursuit
-from sklearn.linear_model._ransac import _dynamic_max_trials
 from sklearn.exceptions import ConvergenceWarning
-
+from sklearn.linear_model import (
+    LinearRegression,
+    OrthogonalMatchingPursuit,
+    RANSACRegressor,
+    Ridge,
+)
+from sklearn.linear_model._ransac import _dynamic_max_trials
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import assert_allclose
 
 # Generate coordinates of line
 X = np.arange(-200, 200)
diff --git a/sklearn/linear_model/tests/test_ridge.py b/sklearn/linear_model/tests/test_ridge.py
index 4b9a0765b4caf..e7f2c911c72b3 100644
--- a/sklearn/linear_model/tests/test_ridge.py
+++ b/sklearn/linear_model/tests/test_ridge.py
@@ -1,52 +1,53 @@
-import numpy as np
-import scipy.sparse as sp
-from scipy import linalg
+import warnings
 from itertools import product
 
+import numpy as np
 import pytest
-import warnings
-
-from sklearn.utils import _IS_32BIT
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.exceptions import ConvergenceWarning
+import scipy.sparse as sp
+from scipy import linalg
 
 from sklearn import datasets
-from sklearn.metrics import mean_squared_error
-from sklearn.metrics import make_scorer
-from sklearn.metrics import get_scorer
-
-from sklearn.linear_model import LinearRegression
-from sklearn.linear_model import ridge_regression
-from sklearn.linear_model import Ridge
-from sklearn.linear_model._ridge import _RidgeGCV
-from sklearn.linear_model import RidgeCV
-from sklearn.linear_model import RidgeClassifier
-from sklearn.linear_model import RidgeClassifierCV
-from sklearn.linear_model._ridge import _solve_cholesky
-from sklearn.linear_model._ridge import _solve_cholesky_kernel
-from sklearn.linear_model._ridge import _solve_svd
-from sklearn.linear_model._ridge import _solve_lbfgs
-from sklearn.linear_model._ridge import _check_gcv_mode
-from sklearn.linear_model._ridge import _X_CenterStackOp
-from sklearn.datasets import make_low_rank_matrix
-from sklearn.datasets import make_regression
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_multilabel_classification
-
-from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import KFold
-from sklearn.model_selection import GroupKFold
-from sklearn.model_selection import cross_val_predict
-from sklearn.model_selection import LeaveOneOut
-
+from sklearn.datasets import (
+    make_classification,
+    make_low_rank_matrix,
+    make_multilabel_classification,
+    make_regression,
+)
+from sklearn.exceptions import ConvergenceWarning
+from sklearn.linear_model import (
+    LinearRegression,
+    Ridge,
+    RidgeClassifier,
+    RidgeClassifierCV,
+    RidgeCV,
+    ridge_regression,
+)
+from sklearn.linear_model._ridge import (
+    _check_gcv_mode,
+    _RidgeGCV,
+    _solve_cholesky,
+    _solve_cholesky_kernel,
+    _solve_lbfgs,
+    _solve_svd,
+    _X_CenterStackOp,
+)
+from sklearn.metrics import get_scorer, make_scorer, mean_squared_error
+from sklearn.model_selection import (
+    GridSearchCV,
+    GroupKFold,
+    KFold,
+    LeaveOneOut,
+    cross_val_predict,
+)
 from sklearn.preprocessing import minmax_scale
-from sklearn.utils import check_random_state
-
+from sklearn.utils import _IS_32BIT, check_random_state
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 
 SOLVERS = ["svd", "sparse_cg", "cholesky", "lsqr", "sag", "saga"]
 SPARSE_SOLVERS_WITH_INTERCEPT = ("sparse_cg", "sag")
@@ -1978,7 +1979,9 @@ def test_lbfgs_solver_error():
 @pytest.mark.parametrize("sparseX", [False, True])
 @pytest.mark.parametrize("data", ["tall", "wide"])
 @pytest.mark.parametrize("solver", SOLVERS + ["lbfgs"])
-def test_ridge_sample_weight_consistency(fit_intercept, sparseX, data, solver):
+def test_ridge_sample_weight_consistency(
+    fit_intercept, sparseX, data, solver, global_random_seed
+):
     """Test that the impact of sample_weight is consistent.
 
     Note that this test is stricter than the common test
@@ -1989,6 +1992,9 @@ def test_ridge_sample_weight_consistency(fit_intercept, sparseX, data, solver):
         if solver == "svd" or (solver in ("cholesky", "saga") and fit_intercept):
             pytest.skip("unsupported configuration")
 
+    # XXX: this test is quite sensitive to the seed used to generate the data:
+    # ideally we would like the test to pass for any global_random_seed but this is not
+    # the case at the moment.
     rng = np.random.RandomState(42)
     n_samples = 12
     if data == "tall":
@@ -2005,6 +2011,7 @@ def test_ridge_sample_weight_consistency(fit_intercept, sparseX, data, solver):
         alpha=1.0,
         solver=solver,
         positive=(solver == "lbfgs"),
+        random_state=global_random_seed,  # for sag/saga
         tol=1e-12,
     )
 
diff --git a/sklearn/linear_model/tests/test_sag.py b/sklearn/linear_model/tests/test_sag.py
index 3779c3d805d87..471d2b8658114 100644
--- a/sklearn/linear_model/tests/test_sag.py
+++ b/sklearn/linear_model/tests/test_sag.py
@@ -5,27 +5,28 @@
 
 import math
 import re
-import pytest
+
 import numpy as np
+import pytest
 import scipy.sparse as sp
 from scipy.special import logsumexp
 
 from sklearn._loss.loss import HalfMultinomialLoss
+from sklearn.base import clone
+from sklearn.datasets import load_iris, make_blobs, make_classification
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.linear_model._base import make_dataset
 from sklearn.linear_model._linear_loss import LinearModelLoss
 from sklearn.linear_model._sag import get_auto_step_size
 from sklearn.linear_model._sag_fast import _multinomial_grad_loss_all_samples
-from sklearn.linear_model import LogisticRegression, Ridge
-from sklearn.linear_model._base import make_dataset
-
+from sklearn.preprocessing import LabelBinarizer, LabelEncoder
+from sklearn.utils import check_random_state, compute_class_weight
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+)
 from sklearn.utils.extmath import row_norms
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils import compute_class_weight
-from sklearn.utils import check_random_state
-from sklearn.preprocessing import LabelEncoder, LabelBinarizer
-from sklearn.datasets import make_blobs, load_iris, make_classification
-from sklearn.base import clone
 
 iris = load_iris()
 
diff --git a/sklearn/linear_model/tests/test_sgd.py b/sklearn/linear_model/tests/test_sgd.py
index 9c921ddf2ebda..6edb76d50f738 100644
--- a/sklearn/linear_model/tests/test_sgd.py
+++ b/sklearn/linear_model/tests/test_sgd.py
@@ -1,29 +1,32 @@
 import pickle
+from unittest.mock import Mock
 
 import joblib
-import pytest
 import numpy as np
+import pytest
 import scipy.sparse as sp
-from unittest.mock import Mock
 
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn import linear_model, datasets, metrics
+from sklearn import datasets, linear_model, metrics
 from sklearn.base import clone, is_classifier
-from sklearn.svm import OneClassSVM
-from sklearn.preprocessing import LabelEncoder, scale, MinMaxScaler
-from sklearn.preprocessing import StandardScaler
-from sklearn.kernel_approximation import Nystroem
-from sklearn.pipeline import make_pipeline
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit
+from sklearn.kernel_approximation import Nystroem
 from sklearn.linear_model import _sgd_fast as sgd_fast
 from sklearn.linear_model import _stochastic_gradient
-from sklearn.model_selection import RandomizedSearchCV
+from sklearn.model_selection import (
+    RandomizedSearchCV,
+    ShuffleSplit,
+    StratifiedShuffleSplit,
+)
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler, StandardScaler, scale
+from sklearn.svm import OneClassSVM
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 
 
 def _update_kwargs(kwargs):
diff --git a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
index 7434729819716..1842f5e77dda3 100644
--- a/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
+++ b/sklearn/linear_model/tests/test_sparse_coordinate_descent.py
@@ -1,17 +1,17 @@
 import numpy as np
-from numpy.testing import assert_allclose
 import pytest
 import scipy.sparse as sp
+from numpy.testing import assert_allclose
 
 from sklearn.datasets import make_regression
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import create_memmap_backed_data
-
-from sklearn.utils._testing import ignore_warnings
 from sklearn.exceptions import ConvergenceWarning
-
-from sklearn.linear_model import Lasso, ElasticNet, LassoCV, ElasticNetCV
+from sklearn.linear_model import ElasticNet, ElasticNetCV, Lasso, LassoCV
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    create_memmap_backed_data,
+    ignore_warnings,
+)
 
 
 def test_sparse_coef():
@@ -19,7 +19,7 @@ def test_sparse_coef():
     clf = ElasticNet()
     clf.coef_ = [1, 2, 3]
 
-    assert sp.isspmatrix(clf.sparse_coef_)
+    assert sp.issparse(clf.sparse_coef_)
     assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
 
 
diff --git a/sklearn/linear_model/tests/test_theil_sen.py b/sklearn/linear_model/tests/test_theil_sen.py
index 27cafd2740076..c8415d02be80a 100644
--- a/sklearn/linear_model/tests/test_theil_sen.py
+++ b/sklearn/linear_model/tests/test_theil_sen.py
@@ -8,16 +8,24 @@
 import re
 import sys
 from contextlib import contextmanager
+
 import numpy as np
 import pytest
-from numpy.testing import assert_array_equal, assert_array_less
-from numpy.testing import assert_array_almost_equal
+from numpy.testing import (
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_array_less,
+)
 from scipy.linalg import norm
 from scipy.optimize import fmin_bfgs
+
 from sklearn.exceptions import ConvergenceWarning
 from sklearn.linear_model import LinearRegression, TheilSenRegressor
-from sklearn.linear_model._theil_sen import _spatial_median, _breakdown_point
-from sklearn.linear_model._theil_sen import _modified_weiszfeld_step
+from sklearn.linear_model._theil_sen import (
+    _breakdown_point,
+    _modified_weiszfeld_step,
+    _spatial_median,
+)
 from sklearn.utils._testing import assert_almost_equal
 
 
diff --git a/sklearn/manifold/__init__.py b/sklearn/manifold/__init__.py
index ae708aa1fd65c..1e8d96c7cf94b 100644
--- a/sklearn/manifold/__init__.py
+++ b/sklearn/manifold/__init__.py
@@ -2,8 +2,8 @@
 The :mod:`sklearn.manifold` module implements data embedding techniques.
 """
 
-from ._locally_linear import locally_linear_embedding, LocallyLinearEmbedding
 from ._isomap import Isomap
+from ._locally_linear import LocallyLinearEmbedding, locally_linear_embedding
 from ._mds import MDS, smacof
 from ._spectral_embedding import SpectralEmbedding, spectral_embedding
 from ._t_sne import TSNE, trustworthiness
diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py
index 0917ef7d207bc..c6e8bfdc42685 100644
--- a/sklearn/manifold/_isomap.py
+++ b/sklearn/manifold/_isomap.py
@@ -3,24 +3,25 @@
 # Author: Jake Vanderplas  -- <vanderplas@astro.washington.edu>
 # License: BSD 3 clause (C) 2011
 import warnings
-
-import numpy as np
 from numbers import Integral, Real
 
+import numpy as np
 from scipy.sparse import issparse
-from scipy.sparse.csgraph import shortest_path
-from scipy.sparse.csgraph import connected_components
-
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
-from ..neighbors import NearestNeighbors, kneighbors_graph
-from ..neighbors import radius_neighbors_graph
-from ..utils.validation import check_is_fitted
+from scipy.sparse.csgraph import connected_components, shortest_path
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
 from ..decomposition import KernelPCA
+from ..metrics.pairwise import _VALID_METRICS
+from ..neighbors import NearestNeighbors, kneighbors_graph, radius_neighbors_graph
 from ..preprocessing import KernelCenterer
-from ..utils.graph import _fix_connected_components
 from ..utils._param_validation import Interval, StrOptions
-from ..metrics.pairwise import _VALID_METRICS
+from ..utils.graph import _fix_connected_components
+from ..utils.validation import check_is_fitted
 
 
 class Isomap(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
@@ -93,7 +94,7 @@ class Isomap(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
 
         .. versionadded:: 0.22
 
-    p : int, default=2
+    p : float, default=2
         Parameter for the Minkowski metric from
         sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
diff --git a/sklearn/manifold/_locally_linear.py b/sklearn/manifold/_locally_linear.py
index 6f57b0627b8be..0547d2bee1402 100644
--- a/sklearn/manifold/_locally_linear.py
+++ b/sklearn/manifold/_locally_linear.py
@@ -7,25 +7,23 @@
 from numbers import Integral, Real
 
 import numpy as np
-from scipy.linalg import svd, qr, solve
-from scipy.sparse import eye, csr_matrix
+from scipy.linalg import eigh, qr, solve, svd
+from scipy.sparse import csr_matrix, eye
 from scipy.sparse.linalg import eigsh
-from scipy.linalg import eigh
 
 from ..base import (
     BaseEstimator,
-    TransformerMixin,
-    _UnstableArchMixin,
     ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
     _fit_context,
+    _UnstableArchMixin,
 )
-from ..utils import check_random_state, check_array
+from ..neighbors import NearestNeighbors
+from ..utils import check_array, check_random_state
 from ..utils._arpack import _init_arpack_v0
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import stable_cumsum
-from ..utils.validation import check_is_fitted
-from ..utils.validation import FLOAT_DTYPES
-from ..neighbors import NearestNeighbors
+from ..utils.validation import FLOAT_DTYPES, check_is_fitted
 
 
 def barycenter_weights(X, Y, indices, reg=1e-3):
diff --git a/sklearn/manifold/_mds.py b/sklearn/manifold/_mds.py
index 6b7a818b94ea8..e497c49a117be 100644
--- a/sklearn/manifold/_mds.py
+++ b/sklearn/manifold/_mds.py
@@ -5,20 +5,18 @@
 # author: Nelle Varoquaux <nelle.varoquaux@gmail.com>
 # License: BSD
 
+import warnings
 from numbers import Integral, Real
 
 import numpy as np
 from joblib import effective_n_jobs
 
-import warnings
-
-from ..base import BaseEstimator
-from ..base import _fit_context
-from ..metrics import euclidean_distances
-from ..utils import check_random_state, check_array, check_symmetric
+from ..base import BaseEstimator, _fit_context
 from ..isotonic import IsotonicRegression
-from ..utils._param_validation import Interval, StrOptions, Hidden
-from ..utils.parallel import delayed, Parallel
+from ..metrics import euclidean_distances
+from ..utils import check_array, check_random_state, check_symmetric
+from ..utils._param_validation import Hidden, Interval, StrOptions
+from ..utils.parallel import Parallel, delayed
 
 
 def _smacof_single(
diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py
index af965a1362b8f..e6b21de7951a3 100644
--- a/sklearn/manifold/_spectral_embedding.py
+++ b/sklearn/manifold/_spectral_embedding.py
@@ -5,29 +5,27 @@
 # License: BSD 3 clause
 
 
-from numbers import Integral, Real
 import warnings
+from numbers import Integral, Real
 
 import numpy as np
 from scipy import sparse
 from scipy.linalg import eigh
-from scipy.sparse.linalg import eigsh
-from scipy.sparse.linalg import lobpcg
 from scipy.sparse.csgraph import connected_components
 from scipy.sparse.csgraph import laplacian as csgraph_laplacian
+from scipy.sparse.linalg import eigsh, lobpcg
 
-from ..base import BaseEstimator
-from ..base import _fit_context
+from ..base import BaseEstimator, _fit_context
+from ..metrics.pairwise import rbf_kernel
+from ..neighbors import NearestNeighbors, kneighbors_graph
 from ..utils import (
     check_array,
     check_random_state,
     check_symmetric,
 )
 from ..utils._arpack import _init_arpack_v0
-from ..utils.extmath import _deterministic_vector_sign_flip
 from ..utils._param_validation import Interval, StrOptions
-from ..metrics.pairwise import rbf_kernel
-from ..neighbors import kneighbors_graph, NearestNeighbors
+from ..utils.extmath import _deterministic_vector_sign_flip
 
 
 def _graph_connected_component(graph, node_id):
@@ -87,7 +85,7 @@ def _graph_is_connected(graph):
     is_connected : bool
         True means the graph is fully connected and False means not.
     """
-    if sparse.isspmatrix(graph):
+    if sparse.issparse(graph):
         # sparse graph, find all the connected components
         n_connected_components, _ = connected_components(graph)
         return n_connected_components == 1
@@ -120,7 +118,7 @@ def _set_diag(laplacian, value, norm_laplacian):
     """
     n_nodes = laplacian.shape[0]
     # We need all entries in the diagonal to values
-    if not sparse.isspmatrix(laplacian):
+    if not sparse.issparse(laplacian):
         if norm_laplacian:
             laplacian.flat[:: n_nodes + 1] = value
     else:
@@ -282,7 +280,7 @@ def spectral_embedding(
     if (
         eigen_solver == "arpack"
         or eigen_solver != "lobpcg"
-        and (not sparse.isspmatrix(laplacian) or n_nodes < 5 * n_components)
+        and (not sparse.issparse(laplacian) or n_nodes < 5 * n_components)
     ):
         # lobpcg used with eigen_solver='amg' has bugs for low number of nodes
         # for details see the source code in scipy:
@@ -373,7 +371,7 @@ def spectral_embedding(
             # see note above under arpack why lobpcg has problems with small
             # number of nodes
             # lobpcg will fallback to eigh, so we short circuit it
-            if sparse.isspmatrix(laplacian):
+            if sparse.issparse(laplacian):
                 laplacian = laplacian.toarray()
             _, diffusion_map = eigh(laplacian, check_finite=False)
             embedding = diffusion_map.T[:n_components]
diff --git a/sklearn/manifold/_t_sne.py b/sklearn/manifold/_t_sne.py
index c372ddcca3c2e..76df3a2a449f5 100644
--- a/sklearn/manifold/_t_sne.py
+++ b/sklearn/manifold/_t_sne.py
@@ -8,28 +8,31 @@
 # * Fast Optimization for t-SNE:
 #   https://cseweb.ucsd.edu/~lvdmaaten/workshops/nips2010/papers/vandermaaten.pdf
 
+from numbers import Integral, Real
 from time import time
+
 import numpy as np
 from scipy import linalg
-from scipy.spatial.distance import pdist
-from scipy.spatial.distance import squareform
 from scipy.sparse import csr_matrix, issparse
-from numbers import Integral, Real
+from scipy.spatial.distance import pdist, squareform
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..decomposition import PCA
+from ..metrics.pairwise import _VALID_METRICS, pairwise_distances
 from ..neighbors import NearestNeighbors
-from ..base import BaseEstimator, ClassNamePrefixFeaturesOutMixin, TransformerMixin
-from ..base import _fit_context
 from ..utils import check_random_state
 from ..utils._openmp_helpers import _openmp_effective_n_threads
-from ..utils.validation import check_non_negative
 from ..utils._param_validation import Interval, StrOptions
-from ..decomposition import PCA
-from ..metrics.pairwise import pairwise_distances, _VALID_METRICS
+from ..utils.validation import check_non_negative
 
 # mypy error: Module 'sklearn.manifold' has no attribute '_utils'
-from . import _utils  # type: ignore
-
 # mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne'
-from . import _barnes_hut_tsne  # type: ignore
+from . import _barnes_hut_tsne, _utils  # type: ignore
 
 MACHINE_EPSILON = np.finfo(np.double).eps
 
diff --git a/sklearn/manifold/tests/test_isomap.py b/sklearn/manifold/tests/test_isomap.py
index 3f8e9848ea3b6..6049e2e618b19 100644
--- a/sklearn/manifold/tests/test_isomap.py
+++ b/sklearn/manifold/tests/test_isomap.py
@@ -1,13 +1,11 @@
+import math
 from itertools import product
+
 import numpy as np
-import math
 import pytest
+from scipy.sparse import rand as sparse_rand
 
-from sklearn import datasets, clone
-from sklearn import manifold
-from sklearn import neighbors
-from sklearn import pipeline
-from sklearn import preprocessing
+from sklearn import clone, datasets, manifold, neighbors, pipeline, preprocessing
 from sklearn.datasets import make_blobs
 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.utils._testing import (
@@ -15,7 +13,6 @@
     assert_allclose_dense_sparse,
     assert_array_equal,
 )
-from scipy.sparse import rand as sparse_rand
 
 eigen_solvers = ["auto", "dense", "arpack"]
 path_methods = ["auto", "FW", "D"]
diff --git a/sklearn/manifold/tests/test_locally_linear.py b/sklearn/manifold/tests/test_locally_linear.py
index 7ebd5981c5df0..17e0e7644a453 100644
--- a/sklearn/manifold/tests/test_locally_linear.py
+++ b/sklearn/manifold/tests/test_locally_linear.py
@@ -1,17 +1,17 @@
 from itertools import product
 
 import numpy as np
-from sklearn.utils._testing import (
-    assert_allclose,
-    assert_array_equal,
-)
-from scipy import linalg
 import pytest
+from scipy import linalg
 
-from sklearn import neighbors, manifold
+from sklearn import manifold, neighbors
 from sklearn.datasets import make_blobs
 from sklearn.manifold._locally_linear import barycenter_kneighbors_graph
-from sklearn.utils._testing import ignore_warnings
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_equal,
+    ignore_warnings,
+)
 
 eigen_solvers = ["dense", "arpack"]
 
@@ -119,7 +119,7 @@ def test_pipeline():
     # check that LocallyLinearEmbedding works fine as a Pipeline
     # only checks that no error is raised.
     # TODO check that it actually does something useful
-    from sklearn import pipeline, datasets
+    from sklearn import datasets, pipeline
 
     X, y = datasets.make_blobs(random_state=0)
     clf = pipeline.Pipeline(
diff --git a/sklearn/manifold/tests/test_mds.py b/sklearn/manifold/tests/test_mds.py
index 0ddc4d4eecb5f..428f56e9870bf 100644
--- a/sklearn/manifold/tests/test_mds.py
+++ b/sklearn/manifold/tests/test_mds.py
@@ -1,7 +1,8 @@
 from unittest.mock import Mock
+
 import numpy as np
-from numpy.testing import assert_array_almost_equal, assert_allclose
 import pytest
+from numpy.testing import assert_allclose, assert_array_almost_equal
 
 from sklearn.manifold import _mds as mds
 from sklearn.metrics import euclidean_distances
diff --git a/sklearn/manifold/tests/test_spectral_embedding.py b/sklearn/manifold/tests/test_spectral_embedding.py
index 2dc51704e9788..e194d30529205 100644
--- a/sklearn/manifold/tests/test_spectral_embedding.py
+++ b/sklearn/manifold/tests/test_spectral_embedding.py
@@ -1,26 +1,24 @@
 from unittest.mock import Mock
-import pytest
 
 import numpy as np
-
+import pytest
 from scipy import sparse
-from scipy.sparse import csgraph
 from scipy.linalg import eigh
-from scipy.sparse.linalg import eigsh
-from scipy.sparse.linalg import lobpcg
+from scipy.sparse import csgraph
+from scipy.sparse.linalg import eigsh, lobpcg
 
-from sklearn.manifold import SpectralEmbedding, _spectral_embedding
-from sklearn.manifold._spectral_embedding import _graph_is_connected
-from sklearn.manifold._spectral_embedding import _graph_connected_component
-from sklearn.manifold import spectral_embedding
-from sklearn.metrics.pairwise import rbf_kernel
-from sklearn.metrics import normalized_mutual_info_score, pairwise_distances
-from sklearn.neighbors import NearestNeighbors
 from sklearn.cluster import KMeans
 from sklearn.datasets import make_blobs
+from sklearn.manifold import SpectralEmbedding, _spectral_embedding, spectral_embedding
+from sklearn.manifold._spectral_embedding import (
+    _graph_connected_component,
+    _graph_is_connected,
+)
+from sklearn.metrics import normalized_mutual_info_score, pairwise_distances
+from sklearn.metrics.pairwise import rbf_kernel
+from sklearn.neighbors import NearestNeighbors
+from sklearn.utils._testing import assert_array_almost_equal, assert_array_equal
 from sklearn.utils.extmath import _deterministic_vector_sign_flip
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
 
 try:
     from pyamg import smoothed_aggregation_solver  # noqa
@@ -245,6 +243,9 @@ def test_spectral_embedding_callable_affinity(X, seed=36):
 @pytest.mark.filterwarnings(
     "ignore:scipy.linalg.pinv2 is deprecated:DeprecationWarning:pyamg.*"
 )
+@pytest.mark.filterwarnings(
+    "ignore:np.find_common_type is deprecated:DeprecationWarning:pyamg.*"
+)
 @pytest.mark.skipif(
     not pyamg_available, reason="PyAMG is required for the tests in this function."
 )
@@ -302,6 +303,10 @@ def test_spectral_embedding_amg_solver(dtype, seed=36):
 @pytest.mark.skipif(
     not pyamg_available, reason="PyAMG is required for the tests in this function."
 )
+# TODO: Remove when pyamg removes the use of np.find_common_type
+@pytest.mark.filterwarnings(
+    "ignore:np.find_common_type is deprecated:DeprecationWarning:pyamg.*"
+)
 @pytest.mark.parametrize("dtype", (np.float32, np.float64))
 def test_spectral_embedding_amg_solver_failure(dtype, seed=36):
     # Non-regression test for amg solver failure (issue #13393 on github)
@@ -466,6 +471,10 @@ def test_error_pyamg_not_available():
         se_precomp.fit_transform(S)
 
 
+# TODO: Remove when pyamg removes the use of np.find_common_type
+@pytest.mark.filterwarnings(
+    "ignore:np.find_common_type is deprecated:DeprecationWarning:pyamg.*"
+)
 @pytest.mark.parametrize("solver", ["arpack", "amg", "lobpcg"])
 def test_spectral_eigen_tol_auto(monkeypatch, solver):
     """Test that `eigen_tol="auto"` is resolved correctly"""
diff --git a/sklearn/manifold/tests/test_t_sne.py b/sklearn/manifold/tests/test_t_sne.py
index 6bbcc15b1a95e..813d72a3de8ae 100644
--- a/sklearn/manifold/tests/test_t_sne.py
+++ b/sklearn/manifold/tests/test_t_sne.py
@@ -1,39 +1,45 @@
 import sys
 from io import StringIO
+
 import numpy as np
-from numpy.testing import assert_allclose
-import scipy.sparse as sp
 import pytest
+import scipy.sparse as sp
+from numpy.testing import assert_allclose
+from scipy.optimize import check_grad
+from scipy.spatial.distance import pdist, squareform
 
 from sklearn import config_context
-from sklearn.neighbors import NearestNeighbors
-from sklearn.neighbors import kneighbors_graph
+from sklearn.datasets import make_blobs
 from sklearn.exceptions import EfficiencyWarning
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import skip_if_32bit
-from sklearn.utils import check_random_state
-from sklearn.manifold._t_sne import _joint_probabilities
-from sklearn.manifold._t_sne import _joint_probabilities_nn
-from sklearn.manifold._t_sne import _kl_divergence
-from sklearn.manifold._t_sne import _kl_divergence_bh
-from sklearn.manifold._t_sne import _gradient_descent
-from sklearn.manifold._t_sne import trustworthiness
-from sklearn.manifold import TSNE
 
 # mypy error: Module 'sklearn.manifold' has no attribute '_barnes_hut_tsne'
-from sklearn.manifold import _barnes_hut_tsne  # type: ignore
+from sklearn.manifold import (  # type: ignore
+    TSNE,
+    _barnes_hut_tsne,
+)
+from sklearn.manifold._t_sne import (
+    _gradient_descent,
+    _joint_probabilities,
+    _joint_probabilities_nn,
+    _kl_divergence,
+    _kl_divergence_bh,
+    trustworthiness,
+)
 from sklearn.manifold._utils import _binary_search_perplexity
-from sklearn.datasets import make_blobs
-from scipy.optimize import check_grad
-from scipy.spatial.distance import pdist
-from scipy.spatial.distance import squareform
-from sklearn.metrics.pairwise import pairwise_distances
-from sklearn.metrics.pairwise import manhattan_distances
-from sklearn.metrics.pairwise import cosine_distances
-
+from sklearn.metrics.pairwise import (
+    cosine_distances,
+    manhattan_distances,
+    pairwise_distances,
+)
+from sklearn.neighbors import NearestNeighbors, kneighbors_graph
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+    skip_if_32bit,
+)
 
 x = np.linspace(0, 1, 10)
 xx, yy = np.meshgrid(x, x)
@@ -441,7 +447,7 @@ def test_sparse_precomputed_distance():
     D_sparse = kneighbors_graph(X, n_neighbors=100, mode="distance", include_self=True)
     D = pairwise_distances(X)
     assert sp.issparse(D_sparse)
-    assert_almost_equal(D_sparse.A, D)
+    assert_almost_equal(D_sparse.toarray(), D)
 
     tsne = TSNE(
         metric="precomputed", random_state=0, init="random", learning_rate="auto"
diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py
index e30d0451cc762..488c776ae9a86 100644
--- a/sklearn/metrics/__init__.py
+++ b/sklearn/metrics/__init__.py
@@ -4,96 +4,93 @@
 """
 
 
-from ._ranking import auc
-from ._ranking import average_precision_score
-from ._ranking import coverage_error
-from ._ranking import det_curve
-from ._ranking import dcg_score
-from ._ranking import label_ranking_average_precision_score
-from ._ranking import label_ranking_loss
-from ._ranking import ndcg_score
-from ._ranking import precision_recall_curve
-from ._ranking import roc_auc_score
-from ._ranking import roc_curve
-from ._ranking import top_k_accuracy_score
-
-from ._classification import accuracy_score
-from ._classification import balanced_accuracy_score
-from ._classification import class_likelihood_ratios
-from ._classification import classification_report
-from ._classification import cohen_kappa_score
-from ._classification import confusion_matrix
-from ._classification import f1_score
-from ._classification import fbeta_score
-from ._classification import hamming_loss
-from ._classification import hinge_loss
-from ._classification import jaccard_score
-from ._classification import log_loss
-from ._classification import matthews_corrcoef
-from ._classification import precision_recall_fscore_support
-from ._classification import precision_score
-from ._classification import recall_score
-from ._classification import zero_one_loss
-from ._classification import brier_score_loss
-from ._classification import multilabel_confusion_matrix
-
-from ._dist_metrics import DistanceMetric
-
 from . import cluster
-from .cluster import adjusted_mutual_info_score
-from .cluster import adjusted_rand_score
-from .cluster import rand_score
-from .cluster import pair_confusion_matrix
-from .cluster import completeness_score
-from .cluster import consensus_score
-from .cluster import homogeneity_completeness_v_measure
-from .cluster import homogeneity_score
-from .cluster import mutual_info_score
-from .cluster import normalized_mutual_info_score
-from .cluster import fowlkes_mallows_score
-from .cluster import silhouette_samples
-from .cluster import silhouette_score
-from .cluster import calinski_harabasz_score
-from .cluster import v_measure_score
-from .cluster import davies_bouldin_score
-
-from .pairwise import euclidean_distances
-from .pairwise import nan_euclidean_distances
-from .pairwise import pairwise_distances
-from .pairwise import pairwise_distances_argmin
-from .pairwise import pairwise_distances_argmin_min
-from .pairwise import pairwise_kernels
-from .pairwise import pairwise_distances_chunked
-
-from ._regression import explained_variance_score
-from ._regression import max_error
-from ._regression import mean_absolute_error
-from ._regression import mean_squared_error
-from ._regression import mean_squared_log_error
-from ._regression import median_absolute_error
-from ._regression import mean_absolute_percentage_error
-from ._regression import mean_pinball_loss
-from ._regression import r2_score
-from ._regression import mean_tweedie_deviance
-from ._regression import mean_poisson_deviance
-from ._regression import mean_gamma_deviance
-from ._regression import d2_tweedie_score
-from ._regression import d2_pinball_score
-from ._regression import d2_absolute_error_score
-
-
-from ._scorer import check_scoring
-from ._scorer import make_scorer
-from ._scorer import get_scorer
-from ._scorer import get_scorer_names
-
-
+from ._classification import (
+    accuracy_score,
+    balanced_accuracy_score,
+    brier_score_loss,
+    class_likelihood_ratios,
+    classification_report,
+    cohen_kappa_score,
+    confusion_matrix,
+    f1_score,
+    fbeta_score,
+    hamming_loss,
+    hinge_loss,
+    jaccard_score,
+    log_loss,
+    matthews_corrcoef,
+    multilabel_confusion_matrix,
+    precision_recall_fscore_support,
+    precision_score,
+    recall_score,
+    zero_one_loss,
+)
+from ._dist_metrics import DistanceMetric
+from ._plot.confusion_matrix import ConfusionMatrixDisplay
 from ._plot.det_curve import DetCurveDisplay
-from ._plot.roc_curve import RocCurveDisplay
 from ._plot.precision_recall_curve import PrecisionRecallDisplay
-from ._plot.confusion_matrix import ConfusionMatrixDisplay
 from ._plot.regression import PredictionErrorDisplay
-
+from ._plot.roc_curve import RocCurveDisplay
+from ._ranking import (
+    auc,
+    average_precision_score,
+    coverage_error,
+    dcg_score,
+    det_curve,
+    label_ranking_average_precision_score,
+    label_ranking_loss,
+    ndcg_score,
+    precision_recall_curve,
+    roc_auc_score,
+    roc_curve,
+    top_k_accuracy_score,
+)
+from ._regression import (
+    d2_absolute_error_score,
+    d2_pinball_score,
+    d2_tweedie_score,
+    explained_variance_score,
+    max_error,
+    mean_absolute_error,
+    mean_absolute_percentage_error,
+    mean_gamma_deviance,
+    mean_pinball_loss,
+    mean_poisson_deviance,
+    mean_squared_error,
+    mean_squared_log_error,
+    mean_tweedie_deviance,
+    median_absolute_error,
+    r2_score,
+)
+from ._scorer import check_scoring, get_scorer, get_scorer_names, make_scorer
+from .cluster import (
+    adjusted_mutual_info_score,
+    adjusted_rand_score,
+    calinski_harabasz_score,
+    completeness_score,
+    consensus_score,
+    davies_bouldin_score,
+    fowlkes_mallows_score,
+    homogeneity_completeness_v_measure,
+    homogeneity_score,
+    mutual_info_score,
+    normalized_mutual_info_score,
+    pair_confusion_matrix,
+    rand_score,
+    silhouette_samples,
+    silhouette_score,
+    v_measure_score,
+)
+from .pairwise import (
+    euclidean_distances,
+    nan_euclidean_distances,
+    pairwise_distances,
+    pairwise_distances_argmin,
+    pairwise_distances_argmin_min,
+    pairwise_distances_chunked,
+    pairwise_kernels,
+)
 
 __all__ = [
     "accuracy_score",
diff --git a/sklearn/metrics/_classification.py b/sklearn/metrics/_classification.py
index 1a0f3cebb6806..0819abb463e2b 100644
--- a/sklearn/metrics/_classification.py
+++ b/sklearn/metrics/_classification.py
@@ -23,27 +23,26 @@
 # License: BSD 3 clause
 
 
-from numbers import Integral, Real
 import warnings
-import numpy as np
+from numbers import Integral, Real
 
-from scipy.sparse import coo_matrix
-from scipy.sparse import csr_matrix
+import numpy as np
+from scipy.sparse import coo_matrix, csr_matrix
 from scipy.special import xlogy
 
-from ..preprocessing import LabelBinarizer
-from ..preprocessing import LabelEncoder
-from ..utils import assert_all_finite
-from ..utils import check_array
-from ..utils import check_consistent_length
-from ..utils import column_or_1d
+from ..exceptions import UndefinedMetricWarning
+from ..preprocessing import LabelBinarizer, LabelEncoder
+from ..utils import (
+    assert_all_finite,
+    check_array,
+    check_consistent_length,
+    column_or_1d,
+)
+from ..utils._param_validation import Interval, Options, StrOptions, validate_params
 from ..utils.extmath import _nanaverage
-from ..utils.multiclass import unique_labels
-from ..utils.multiclass import type_of_target
-from ..utils.validation import _check_pos_label_consistency, _num_samples
+from ..utils.multiclass import type_of_target, unique_labels
 from ..utils.sparsefuncs import count_nonzero
-from ..utils._param_validation import StrOptions, Options, Interval, validate_params
-from ..exceptions import UndefinedMetricWarning
+from ..utils.validation import _check_pos_label_consistency, _num_samples
 
 
 def _check_zero_division(zero_division):
@@ -148,7 +147,8 @@ def _weighted_sum(sample_score, sample_weight, normalize=False):
         "y_pred": ["array-like", "sparse matrix"],
         "normalize": ["boolean"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
     """Accuracy classification score.
@@ -235,7 +235,8 @@ def accuracy_score(y_true, y_pred, *, normalize=True, sample_weight=None):
         "labels": ["array-like", None],
         "sample_weight": ["array-like", None],
         "normalize": [StrOptions({"true", "pred", "all"}), None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def confusion_matrix(
     y_true, y_pred, *, labels=None, sample_weight=None, normalize=None
@@ -398,7 +399,8 @@ def confusion_matrix(
         "sample_weight": ["array-like", None],
         "labels": ["array-like", None],
         "samplewise": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def multilabel_confusion_matrix(
     y_true, y_pred, *, sample_weight=None, labels=None, samplewise=False
@@ -616,7 +618,8 @@ def multilabel_confusion_matrix(
         "labels": ["array-like", None],
         "weights": [StrOptions({"linear", "quadratic"}), None],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def cohen_kappa_score(y1, y2, *, labels=None, weights=None, sample_weight=None):
     r"""Compute Cohen's kappa: a statistic that measures inter-annotator agreement.
@@ -710,7 +713,8 @@ class labels [2]_.
             Options(Real, {0, 1}),
             StrOptions({"warn"}),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def jaccard_score(
     y_true,
@@ -888,7 +892,8 @@ def jaccard_score(
         "y_true": ["array-like"],
         "y_pred": ["array-like"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def matthews_corrcoef(y_true, y_pred, *, sample_weight=None):
     """Compute the Matthews correlation coefficient (MCC).
@@ -984,7 +989,8 @@ def matthews_corrcoef(y_true, y_pred, *, sample_weight=None):
         "y_pred": ["array-like", "sparse matrix"],
         "normalize": ["boolean"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):
     """Zero-one classification loss.
@@ -1073,10 +1079,12 @@ def zero_one_loss(y_true, y_pred, *, normalize=True, sample_weight=None):
         ],
         "sample_weight": ["array-like", None],
         "zero_division": [
-            Options(Real, {0.0, 1.0, np.nan}),
+            Options(Real, {0.0, 1.0}),
+            "nan",
             StrOptions({"warn"}),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def f1_score(
     y_true,
@@ -1253,10 +1261,12 @@ def f1_score(
         ],
         "sample_weight": ["array-like", None],
         "zero_division": [
-            Options(Real, {0.0, 1.0, np.nan}),
+            Options(Real, {0.0, 1.0}),
+            "nan",
             StrOptions({"warn"}),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fbeta_score(
     y_true,
@@ -1534,10 +1544,12 @@ def _check_set_wise_labels(y_true, y_pred, average, labels, pos_label):
         "warn_for": [list, tuple, set],
         "sample_weight": ["array-like", None],
         "zero_division": [
-            Options(Real, {0.0, 1.0, np.nan}),
+            Options(Real, {0.0, 1.0}),
+            "nan",
             StrOptions({"warn"}),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def precision_recall_fscore_support(
     y_true,
@@ -1787,7 +1799,8 @@ def precision_recall_fscore_support(
         "labels": ["array-like", None],
         "sample_weight": ["array-like", None],
         "raise_warning": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def class_likelihood_ratios(
     y_true,
@@ -1969,10 +1982,12 @@ class after being classified as negative. This is the case when the
         ],
         "sample_weight": ["array-like", None],
         "zero_division": [
-            Options(Real, {0.0, 1.0, np.nan}),
+            Options(Real, {0.0, 1.0}),
+            "nan",
             StrOptions({"warn"}),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def precision_score(
     y_true,
@@ -2138,10 +2153,12 @@ def precision_score(
         ],
         "sample_weight": ["array-like", None],
         "zero_division": [
-            Options(Real, {0.0, 1.0, np.nan}),
+            Options(Real, {0.0, 1.0}),
+            "nan",
             StrOptions({"warn"}),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def recall_score(
     y_true,
@@ -2303,7 +2320,8 @@ def recall_score(
         "y_pred": ["array-like"],
         "sample_weight": ["array-like", None],
         "adjusted": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def balanced_accuracy_score(y_true, y_pred, *, sample_weight=None, adjusted=False):
     """Compute the balanced accuracy.
@@ -2399,10 +2417,12 @@ def balanced_accuracy_score(y_true, y_pred, *, sample_weight=None, adjusted=Fals
         "digits": [Interval(Integral, 0, None, closed="left")],
         "output_dict": ["boolean"],
         "zero_division": [
-            Options(Real, {0.0, 1.0, np.nan}),
+            Options(Real, {0.0, 1.0}),
+            "nan",
             StrOptions({"warn"}),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def classification_report(
     y_true,
@@ -2632,7 +2652,8 @@ class 2       1.00      0.67      0.80         3
         "y_true": ["array-like", "sparse matrix"],
         "y_pred": ["array-like", "sparse matrix"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def hamming_loss(y_true, y_pred, *, sample_weight=None):
     """Compute the average Hamming loss.
@@ -2736,7 +2757,8 @@ def hamming_loss(y_true, y_pred, *, sample_weight=None):
         "normalize": ["boolean"],
         "sample_weight": ["array-like", None],
         "labels": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def log_loss(
     y_true, y_pred, *, eps="auto", normalize=True, sample_weight=None, labels=None
@@ -2768,7 +2790,7 @@ def log_loss(
         the probabilities provided are assumed to be that of the
         positive class. The labels in ``y_pred`` are assumed to be
         ordered alphabetically, as done by
-        :class:`preprocessing.LabelBinarizer`.
+        :class:`~sklearn.preprocessing.LabelBinarizer`.
 
     eps : float or "auto", default="auto"
         Log loss is undefined for p=0 or p=1, so probabilities are
@@ -2916,7 +2938,8 @@ def log_loss(
         "pred_decision": ["array-like"],
         "labels": ["array-like", None],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None):
     """Average hinge loss (non-regularized).
@@ -3067,7 +3090,8 @@ def hinge_loss(y_true, pred_decision, *, labels=None, sample_weight=None):
         "y_prob": ["array-like"],
         "sample_weight": ["array-like", None],
         "pos_label": [Real, str, "boolean", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def brier_score_loss(y_true, y_prob, *, sample_weight=None, pos_label=None):
     """Compute the Brier score loss.
diff --git a/sklearn/metrics/_dist_metrics.pyx.tp b/sklearn/metrics/_dist_metrics.pyx.tp
index bc54e51a7511a..8eda9b69ec525 100644
--- a/sklearn/metrics/_dist_metrics.pyx.tp
+++ b/sklearn/metrics/_dist_metrics.pyx.tp
@@ -65,6 +65,118 @@ def get_valid_metric_ids(L):
             if (val.__name__ in L) or (val in L)]
 
 cdef class DistanceMetric:
+    """Uniform interface for fast distance metric functions.
+
+    The `DistanceMetric` class provides a convenient way to compute pairwise distances
+    between samples. It supports various distance metrics, such as Euclidean distance,
+    Manhattan distance, and more.
+
+    The `pairwise` method can be used to compute pairwise distances between samples in
+    the input arrays. It returns a distance matrix representing the distances between
+    all pairs of samples.
+
+    The :meth:`get_metric` method allows you to retrieve a specific metric using its
+    string identifier.
+
+    Examples
+    --------
+    >>> from sklearn.metrics import DistanceMetric
+    >>> dist = DistanceMetric.get_metric('euclidean')
+    >>> X = [[1, 2], [3, 4], [5, 6]]
+    >>> Y = [[7, 8], [9, 10]]
+    >>> dist.pairwise(X,Y)
+    array([[7.81..., 10.63...]
+           [5.65...,  8.48...]
+           [1.41...,  4.24...]])
+
+    Available Metrics
+
+    The following lists the string metric identifiers and the associated
+    distance metric classes:
+
+    **Metrics intended for real-valued vector spaces:**
+
+    ==============  ====================  ========  ===============================
+    identifier      class name            args      distance function
+    --------------  --------------------  --------  -------------------------------
+    "euclidean"     EuclideanDistance     -         ``sqrt(sum((x - y)^2))``
+    "manhattan"     ManhattanDistance     -         ``sum(|x - y|)``
+    "chebyshev"     ChebyshevDistance     -         ``max(|x - y|)``
+    "minkowski"     MinkowskiDistance     p, w      ``sum(w * |x - y|^p)^(1/p)``
+    "seuclidean"    SEuclideanDistance    V         ``sqrt(sum((x - y)^2 / V))``
+    "mahalanobis"   MahalanobisDistance   V or VI   ``sqrt((x - y)' V^-1 (x - y))``
+    ==============  ====================  ========  ===============================
+
+    **Metrics intended for two-dimensional vector spaces:**  Note that the haversine
+    distance metric requires data in the form of [latitude, longitude] and both
+    inputs and outputs are in units of radians.
+
+    ============  ==================  ===============================================================
+    identifier    class name          distance function
+    ------------  ------------------  ---------------------------------------------------------------
+    "haversine"   HaversineDistance   ``2 arcsin(sqrt(sin^2(0.5*dx) + cos(x1)cos(x2)sin^2(0.5*dy)))``
+    ============  ==================  ===============================================================
+
+
+    **Metrics intended for integer-valued vector spaces:**  Though intended
+    for integer-valued vectors, these are also valid metrics in the case of
+    real-valued vectors.
+
+    =============  ====================  ========================================
+    identifier     class name            distance function
+    -------------  --------------------  ----------------------------------------
+    "hamming"      HammingDistance       ``N_unequal(x, y) / N_tot``
+    "canberra"     CanberraDistance      ``sum(|x - y| / (|x| + |y|))``
+    "braycurtis"   BrayCurtisDistance    ``sum(|x - y|) / (sum(|x|) + sum(|y|))``
+    =============  ====================  ========================================
+
+    **Metrics intended for boolean-valued vector spaces:**  Any nonzero entry
+    is evaluated to "True".  In the listings below, the following
+    abbreviations are used:
+
+     - N  : number of dimensions
+     - NTT : number of dims in which both values are True
+     - NTF : number of dims in which the first value is True, second is False
+     - NFT : number of dims in which the first value is False, second is True
+     - NFF : number of dims in which both values are False
+     - NNEQ : number of non-equal dimensions, NNEQ = NTF + NFT
+     - NNZ : number of nonzero dimensions, NNZ = NTF + NFT + NTT
+
+    =================  =======================  ===============================
+    identifier         class name               distance function
+    -----------------  -----------------------  -------------------------------
+    "jaccard"          JaccardDistance          NNEQ / NNZ
+    "matching"         MatchingDistance         NNEQ / N
+    "dice"             DiceDistance             NNEQ / (NTT + NNZ)
+    "kulsinski"        KulsinskiDistance        (NNEQ + N - NTT) / (NNEQ + N)
+    "rogerstanimoto"   RogersTanimotoDistance   2 * NNEQ / (N + NNEQ)
+    "russellrao"       RussellRaoDistance       (N - NTT) / N
+    "sokalmichener"    SokalMichenerDistance    2 * NNEQ / (N + NNEQ)
+    "sokalsneath"      SokalSneathDistance      NNEQ / (NNEQ + 0.5 * NTT)
+    =================  =======================  ===============================
+
+    **User-defined distance:**
+
+    ===========    ===============    =======
+    identifier     class name         args
+    -----------    ---------------    -------
+    "pyfunc"       PyFuncDistance     func
+    ===========    ===============    =======
+
+    Here ``func`` is a function which takes two one-dimensional numpy
+    arrays, and returns a distance.  Note that in order to be used within
+    the BallTree, the distance must be a true metric:
+    i.e. it must satisfy the following properties
+
+    1) Non-negativity: d(x, y) >= 0
+    2) Identity: d(x, y) = 0 if and only if x == y
+    3) Symmetry: d(x, y) = d(y, x)
+    4) Triangle Inequality: d(x, y) + d(y, z) >= d(x, z)
+
+    Because of the Python object overhead involved in calling the python
+    function, this will be fairly slow, but it will have the same
+    scaling as other distances.
+    """
     @classmethod
     def get_metric(cls, metric, dtype=np.float64, **kwargs):
         """Get the given distance metric from the string identifier.
@@ -74,11 +186,24 @@ cdef class DistanceMetric:
         Parameters
         ----------
         metric : str or class name
-            The distance metric to use
+            The string identifier or class name of the desired distance metric.
+            See the documentation of the `DistanceMetric` class for a list of
+            available metrics.
+
         dtype : {np.float32, np.float64}, default=np.float64
-            The dtype of the data on which the metric will be applied
+            The data type of the input on which the metric will be applied.
+            This affects the precision of the computed distances.
+            By default, it is set to `np.float64`.
+
         **kwargs
-            additional arguments will be passed to the requested metric
+            Additional keyword arguments that will be passed to the requested metric.
+            These arguments can be used to customize the behavior of the specific
+            metric.
+
+        Returns
+        -------
+        metric_obj : instance of the requested metric
+            An instance of the requested distance metric class.
         """
         if dtype == np.float32:
             specialized_class = DistanceMetric32
@@ -1271,19 +1396,27 @@ cdef class MinkowskiDistance{{name_suffix}}(DistanceMetric{{name_suffix}}):
 
     Parameters
     ----------
-    p : int
+    p : float
         The order of the p-norm of the difference (see above).
+
+        .. versionchanged:: 1.4.0
+            Minkowski distance allows `p` to be `0<p<1`.
+
+
     w : (N,) array-like (optional)
         The weight vector.
 
-    Minkowski Distance requires p >= 1 and finite. For p = infinity,
-    use ChebyshevDistance.
+    Minkowski Distance requires p > 0 and finite.
+    When :math:`p \in (0,1)`, it isn't a true metric but is permissible when
+    the triangular inequality isn't necessary.
+    For p = infinity, use ChebyshevDistance.
     Note that for p=1, ManhattanDistance is more efficient, and for
     p=2, EuclideanDistance is more efficient.
+
     """
     def __init__(self, p, w=None):
-        if p < 1:
-            raise ValueError("p must be greater than 1")
+        if p <= 0:
+            raise ValueError("p must be greater than 0")
         elif np.isinf(p):
             raise ValueError("MinkowskiDistance requires finite p. "
                              "For p=inf, use ChebyshevDistance.")
diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py
index baa1c9de03952..68972de0a1a51 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py
+++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py
@@ -87,10 +87,10 @@
 
 
 from ._dispatcher import (
-    BaseDistancesReductionDispatcher,
     ArgKmin,
-    RadiusNeighbors,
     ArgKminClassMode,
+    BaseDistancesReductionDispatcher,
+    RadiusNeighbors,
     sqeuclidean_row_norms,
 )
 
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
index 5f4325af3a09f..796f15ab6fca0 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
+++ b/sklearn/metrics/_pairwise_distances_reduction/_dispatcher.py
@@ -1,31 +1,25 @@
 from abc import abstractmethod
-
-import numpy as np
-
 from typing import List
 
-from scipy.sparse import isspmatrix_csr, issparse
+import numpy as np
+from scipy.sparse import issparse
 
+from ... import get_config
 from .._dist_metrics import BOOL_METRICS, METRIC_MAPPING64
-
-from ._base import _sqeuclidean_row_norms32, _sqeuclidean_row_norms64
 from ._argkmin import (
-    ArgKmin64,
     ArgKmin32,
+    ArgKmin64,
 )
-
 from ._argkmin_classmode import (
-    ArgKminClassMode64,
     ArgKminClassMode32,
+    ArgKminClassMode64,
 )
-
+from ._base import _sqeuclidean_row_norms32, _sqeuclidean_row_norms64
 from ._radius_neighbors import (
-    RadiusNeighbors64,
     RadiusNeighbors32,
+    RadiusNeighbors64,
 )
 
-from ... import get_config
-
 
 def sqeuclidean_row_norms(X, num_threads):
     """Compute the squared euclidean norm of the rows of X in parallel.
@@ -102,11 +96,12 @@ def is_usable_for(cls, X, Y, metric) -> bool:
         """
 
         def is_numpy_c_ordered(X):
-            return hasattr(X, "flags") and X.flags.c_contiguous
+            return hasattr(X, "flags") and getattr(X.flags, "c_contiguous", False)
 
         def is_valid_sparse_matrix(X):
             return (
-                isspmatrix_csr(X)
+                issparse(X)
+                and X.format == "csr"
                 and
                 # TODO: support CSR matrices without non-zeros elements
                 X.nnz > 0
diff --git a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
index 1defa30b6325e..8695baad172e0 100644
--- a/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
+++ b/sklearn/metrics/_pairwise_distances_reduction/_radius_neighbors.pyx.tp
@@ -300,7 +300,8 @@ cdef class RadiusNeighbors{{name_suffix}}(BaseDistancesReduction{{name_suffix}})
     cdef void compute_exact_distances(self) noexcept nogil:
         """Convert rank-preserving distances to pairwise distances in parallel."""
         cdef:
-            intp_t i, j
+            intp_t i
+            vector[intp_t].size_type j
 
         for i in prange(self.n_samples_X, nogil=True, schedule='static',
                         num_threads=self.effective_n_threads):
diff --git a/sklearn/metrics/_plot/confusion_matrix.py b/sklearn/metrics/_plot/confusion_matrix.py
index 1611bb9605d85..f0bda0dc73d39 100644
--- a/sklearn/metrics/_plot/confusion_matrix.py
+++ b/sklearn/metrics/_plot/confusion_matrix.py
@@ -2,10 +2,10 @@
 
 import numpy as np
 
-from .. import confusion_matrix
+from ...base import is_classifier
 from ...utils import check_matplotlib_support
 from ...utils.multiclass import unique_labels
-from ...base import is_classifier
+from .. import confusion_matrix
 
 
 class ConfusionMatrixDisplay:
diff --git a/sklearn/metrics/_plot/det_curve.py b/sklearn/metrics/_plot/det_curve.py
index 69ca8de8b5918..98997e01750bc 100644
--- a/sklearn/metrics/_plot/det_curve.py
+++ b/sklearn/metrics/_plot/det_curve.py
@@ -1,7 +1,7 @@
 import scipy as sp
 
-from .. import det_curve
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
+from .._ranking import det_curve
 
 
 class DetCurveDisplay(_BinaryClassifierCurveDisplayMixin):
@@ -292,7 +292,7 @@ def plot(self, ax=None, *, name=None, **kwargs):
 
         Returns
         -------
-        display : :class:`~sklearn.metrics.plot.DetCurveDisplay`
+        display : :class:`~sklearn.metrics.DetCurveDisplay`
             Object that stores computed values.
         """
         self.ax_, self.figure_, name = self._validate_plot_params(ax=ax, name=name)
diff --git a/sklearn/metrics/_plot/precision_recall_curve.py b/sklearn/metrics/_plot/precision_recall_curve.py
index 5df70aa75b5fb..1c13377cfe137 100644
--- a/sklearn/metrics/_plot/precision_recall_curve.py
+++ b/sklearn/metrics/_plot/precision_recall_curve.py
@@ -1,8 +1,7 @@
 from collections import Counter
 
-from .. import average_precision_score
-from .. import precision_recall_curve
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
+from .._ranking import average_precision_score, precision_recall_curve
 
 
 class PrecisionRecallDisplay(_BinaryClassifierCurveDisplayMixin):
@@ -11,7 +10,7 @@ class PrecisionRecallDisplay(_BinaryClassifierCurveDisplayMixin):
     It is recommend to use
     :func:`~sklearn.metrics.PrecisionRecallDisplay.from_estimator` or
     :func:`~sklearn.metrics.PrecisionRecallDisplay.from_predictions` to create
-    a :class:`~sklearn.metrics.PredictionRecallDisplay`. All parameters are
+    a :class:`~sklearn.metrics.PrecisionRecallDisplay`. All parameters are
     stored as attributes.
 
     Read more in the :ref:`User Guide <visualizations>`.
@@ -70,7 +69,7 @@ class PrecisionRecallDisplay(_BinaryClassifierCurveDisplayMixin):
 
     Notes
     -----
-    The average precision (cf. :func:`~sklearn.metrics.average_precision`) in
+    The average precision (cf. :func:`~sklearn.metrics.average_precision_score`) in
     scikit-learn is computed without any interpolation. To be consistent with
     this metric, the precision-recall curve is plotted without any
     interpolation as well (step-wise style).
@@ -165,7 +164,7 @@ def plot(
 
         Notes
         -----
-        The average precision (cf. :func:`~sklearn.metrics.average_precision`)
+        The average precision (cf. :func:`~sklearn.metrics.average_precision_score`)
         in scikit-learn is computed without any interpolation. To be consistent
         with this metric, the precision-recall curve is plotted without any
         interpolation as well (step-wise style).
@@ -313,7 +312,7 @@ def from_estimator(
 
         Notes
         -----
-        The average precision (cf. :func:`~sklearn.metrics.average_precision`)
+        The average precision (cf. :func:`~sklearn.metrics.average_precision_score`)
         in scikit-learn is computed without any interpolation. To be consistent
         with this metric, the precision-recall curve is plotted without any
         interpolation as well (step-wise style).
@@ -435,7 +434,7 @@ def from_predictions(
 
         Notes
         -----
-        The average precision (cf. :func:`~sklearn.metrics.average_precision`)
+        The average precision (cf. :func:`~sklearn.metrics.average_precision_score`)
         in scikit-learn is computed without any interpolation. To be consistent
         with this metric, the precision-recall curve is plotted without any
         interpolation as well (step-wise style).
diff --git a/sklearn/metrics/_plot/regression.py b/sklearn/metrics/_plot/regression.py
index 46440c3e133b1..0b855d679866f 100644
--- a/sklearn/metrics/_plot/regression.py
+++ b/sklearn/metrics/_plot/regression.py
@@ -2,9 +2,7 @@
 
 import numpy as np
 
-from ...utils import check_matplotlib_support
-from ...utils import check_random_state
-from ...utils import _safe_indexing
+from ...utils import _safe_indexing, check_matplotlib_support, check_random_state
 
 
 class PredictionErrorDisplay:
@@ -117,7 +115,8 @@ def plot(
 
         Returns
         -------
-        display : :class:`~sklearn.metrics.plot.PredictionErrorDisplay`
+        display : :class:`~sklearn.metrics.PredictionErrorDisplay`
+
             Object that stores computed values.
         """
         check_matplotlib_support(f"{self.__class__.__name__}.plot")
diff --git a/sklearn/metrics/_plot/roc_curve.py b/sklearn/metrics/_plot/roc_curve.py
index aa48936b938ef..9fc401383c45f 100644
--- a/sklearn/metrics/_plot/roc_curve.py
+++ b/sklearn/metrics/_plot/roc_curve.py
@@ -1,6 +1,5 @@
-from .. import auc
-from .. import roc_curve
 from ...utils._plotting import _BinaryClassifierCurveDisplayMixin
+from .._ranking import auc, roc_curve
 
 
 class RocCurveDisplay(_BinaryClassifierCurveDisplayMixin):
@@ -122,7 +121,7 @@ def plot(
 
         Returns
         -------
-        display : :class:`~sklearn.metrics.plot.RocCurveDisplay`
+        display : :class:`~sklearn.metrics.RocCurveDisplay`
             Object that stores computed values.
         """
         self.ax_, self.figure_, name = self._validate_plot_params(ax=ax, name=name)
@@ -241,7 +240,7 @@ def from_estimator(
 
         Returns
         -------
-        display : :class:`~sklearn.metrics.plot.RocCurveDisplay`
+        display : :class:`~sklearn.metrics.RocCurveDisplay`
             The ROC Curve display.
 
         See Also
diff --git a/sklearn/metrics/_plot/tests/test_common_curve_display.py b/sklearn/metrics/_plot/tests/test_common_curve_display.py
index b9fda563fc984..47ac750f9b278 100644
--- a/sklearn/metrics/_plot/tests/test_common_curve_display.py
+++ b/sklearn/metrics/_plot/tests/test_common_curve_display.py
@@ -2,20 +2,19 @@
 import pytest
 
 from sklearn.base import ClassifierMixin, clone
+from sklearn.calibration import CalibrationDisplay
 from sklearn.compose import make_column_transformer
 from sklearn.datasets import load_iris
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LogisticRegression
-from sklearn.pipeline import make_pipeline
-from sklearn.preprocessing import StandardScaler
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-
-from sklearn.calibration import CalibrationDisplay
 from sklearn.metrics import (
     DetCurveDisplay,
     PrecisionRecallDisplay,
     RocCurveDisplay,
 )
+from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 
 
 @pytest.fixture(scope="module")
diff --git a/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py b/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py
index 48b7a44f39ea8..66c90d81dc016 100644
--- a/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py
+++ b/sklearn/metrics/_plot/tests/test_confusion_matrix_display.py
@@ -1,22 +1,19 @@
+import numpy as np
+import pytest
 from numpy.testing import (
     assert_allclose,
     assert_array_equal,
 )
-import numpy as np
-import pytest
 
-from sklearn.datasets import make_classification
 from sklearn.compose import make_column_transformer
+from sklearn.datasets import make_classification
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.svm import SVC, SVR
 
-from sklearn.metrics import ConfusionMatrixDisplay
-from sklearn.metrics import confusion_matrix
-
-
 # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
 pytestmark = pytest.mark.filterwarnings(
     "ignore:In future, it will be an error for 'np.bool_':DeprecationWarning:"
diff --git a/sklearn/metrics/_plot/tests/test_det_curve_display.py b/sklearn/metrics/_plot/tests/test_det_curve_display.py
index 5d7a26d5e49a0..403ea70109577 100644
--- a/sklearn/metrics/_plot/tests/test_det_curve_display.py
+++ b/sklearn/metrics/_plot/tests/test_det_curve_display.py
@@ -1,12 +1,10 @@
-import pytest
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 
 from sklearn.datasets import load_iris
 from sklearn.linear_model import LogisticRegression
-
-from sklearn.metrics import det_curve
-from sklearn.metrics import DetCurveDisplay
+from sklearn.metrics import DetCurveDisplay, det_curve
 
 
 @pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"])
diff --git a/sklearn/metrics/_plot/tests/test_precision_recall_display.py b/sklearn/metrics/_plot/tests/test_precision_recall_display.py
index 0bb6501dec89a..5487fe3156bdb 100644
--- a/sklearn/metrics/_plot/tests/test_precision_recall_display.py
+++ b/sklearn/metrics/_plot/tests/test_precision_recall_display.py
@@ -7,13 +7,16 @@
 from sklearn.datasets import load_breast_cancer, make_classification
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import average_precision_score, precision_recall_curve
+from sklearn.metrics import (
+    PrecisionRecallDisplay,
+    average_precision_score,
+    precision_recall_curve,
+)
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import shuffle
-
-from sklearn.metrics import PrecisionRecallDisplay
+from sklearn.utils.fixes import trapezoid
 
 # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
 pytestmark = pytest.mark.filterwarnings(
@@ -284,7 +287,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
     # we should obtain the statistics of the "cancer" class
     avg_prec_limit = 0.65
     assert display.average_precision < avg_prec_limit
-    assert -np.trapz(display.precision, display.recall) < avg_prec_limit
+    assert -trapezoid(display.precision, display.recall) < avg_prec_limit
 
     # otherwise we should obtain the statistics of the "not cancer" class
     if constructor_name == "from_estimator":
@@ -303,7 +306,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
         )
     avg_prec_limit = 0.95
     assert display.average_precision > avg_prec_limit
-    assert -np.trapz(display.precision, display.recall) > avg_prec_limit
+    assert -trapezoid(display.precision, display.recall) > avg_prec_limit
 
 
 @pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"])
diff --git a/sklearn/metrics/_plot/tests/test_predict_error_display.py b/sklearn/metrics/_plot/tests/test_predict_error_display.py
index 3d3833d825360..535c9af9506ce 100644
--- a/sklearn/metrics/_plot/tests/test_predict_error_display.py
+++ b/sklearn/metrics/_plot/tests/test_predict_error_display.py
@@ -1,11 +1,9 @@
 import pytest
-
 from numpy.testing import assert_allclose
 
 from sklearn.datasets import load_diabetes
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import Ridge
-
 from sklearn.metrics import PredictionErrorDisplay
 
 X, y = load_diabetes(return_X_y=True)
diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
index 9a390e09e6871..56f20204ddc2d 100644
--- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py
+++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
@@ -1,24 +1,17 @@
-import pytest
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 
-
 from sklearn.compose import make_column_transformer
-from sklearn.datasets import load_iris
-
-from sklearn.datasets import load_breast_cancer
+from sklearn.datasets import load_breast_cancer, load_iris
 from sklearn.exceptions import NotFittedError
 from sklearn.linear_model import LogisticRegression
-from sklearn.metrics import roc_curve
-from sklearn.metrics import auc
-
+from sklearn.metrics import RocCurveDisplay, auc, roc_curve
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import shuffle
-
-
-from sklearn.metrics import RocCurveDisplay
+from sklearn.utils.fixes import trapezoid
 
 
 @pytest.fixture(scope="module")
@@ -298,7 +291,7 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
     roc_auc_limit = 0.95679
 
     assert display.roc_auc == pytest.approx(roc_auc_limit)
-    assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
+    assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
 
     if constructor_name == "from_estimator":
         display = RocCurveDisplay.from_estimator(
@@ -316,4 +309,4 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
         )
 
     assert display.roc_auc == pytest.approx(roc_auc_limit)
-    assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
+    assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index eb2d50c649516..a7d4b5ef18d66 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -21,28 +21,34 @@
 
 import warnings
 from functools import partial
-from numbers import Real, Integral
+from numbers import Integral, Real
 
 import numpy as np
 from scipy.sparse import csr_matrix, issparse
 from scipy.stats import rankdata
 
-from ..utils import assert_all_finite
-from ..utils import check_consistent_length
-from ..utils.validation import _check_pos_label_consistency, _check_sample_weight
-from ..utils import column_or_1d, check_array
-from ..utils.multiclass import type_of_target
-from ..utils.extmath import stable_cumsum
-from ..utils.sparsefuncs import count_nonzero
-from ..utils._param_validation import validate_params, StrOptions, Interval
 from ..exceptions import UndefinedMetricWarning
 from ..preprocessing import label_binarize
+from ..utils import (
+    assert_all_finite,
+    check_array,
+    check_consistent_length,
+    column_or_1d,
+)
 from ..utils._encode import _encode, _unique
-
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.extmath import stable_cumsum
+from ..utils.fixes import trapezoid
+from ..utils.multiclass import type_of_target
+from ..utils.sparsefuncs import count_nonzero
+from ..utils.validation import _check_pos_label_consistency, _check_sample_weight
 from ._base import _average_binary_score, _average_multiclass_ovo_score
 
 
-@validate_params({"x": ["array-like"], "y": ["array-like"]})
+@validate_params(
+    {"x": ["array-like"], "y": ["array-like"]},
+    prefer_skip_nested_validation=True,
+)
 def auc(x, y):
     """Compute Area Under the Curve (AUC) using the trapezoidal rule.
 
@@ -99,9 +105,9 @@ def auc(x, y):
         else:
             raise ValueError("x is neither increasing nor decreasing : {}.".format(x))
 
-    area = direction * np.trapz(y, x)
+    area = direction * trapezoid(y, x)
     if isinstance(area, np.memmap):
-        # Reductions such as .sum used internally in np.trapz do not return a
+        # Reductions such as .sum used internally in trapezoid do not return a
         # scalar by default for numpy.memmap instances contrary to
         # regular numpy.ndarray instances.
         area = area.dtype.type(area)
@@ -115,7 +121,8 @@ def auc(x, y):
         "average": [StrOptions({"micro", "samples", "weighted", "macro"}), None],
         "pos_label": [Real, str, "boolean"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def average_precision_score(
     y_true, y_score, *, average="macro", pos_label=1, sample_weight=None
@@ -269,7 +276,8 @@ def _binary_uninterpolated_average_precision(
         "y_score": ["array-like"],
         "pos_label": [Real, str, "boolean", None],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def det_curve(y_true, y_score, pos_label=None, sample_weight=None):
     """Compute error rates for different probability thresholds.
@@ -306,7 +314,7 @@ def det_curve(y_true, y_score, pos_label=None, sample_weight=None):
     fpr : ndarray of shape (n_thresholds,)
         False positive rate (FPR) such that element i is the false positive
         rate of predictions with score >= thresholds[i]. This is occasionally
-        referred to as false acceptance propability or fall-out.
+        referred to as false acceptance probability or fall-out.
 
     fnr : ndarray of shape (n_thresholds,)
         False negative rate (FNR) such that element i is the false negative
@@ -406,7 +414,8 @@ def _binary_roc_auc_score(y_true, y_score, sample_weight=None, max_fpr=None):
         "max_fpr": [Interval(Real, 0.0, 1, closed="right"), None],
         "multi_class": [StrOptions({"raise", "ovr", "ovo"})],
         "labels": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def roc_auc_score(
     y_true,
@@ -847,7 +856,8 @@ def _binary_clf_curve(y_true, y_score, pos_label=None, sample_weight=None):
         "pos_label": [Real, str, "boolean", None],
         "sample_weight": ["array-like", None],
         "drop_intermediate": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def precision_recall_curve(
     y_true, probas_pred, *, pos_label=None, sample_weight=None, drop_intermediate=False
@@ -987,7 +997,8 @@ def precision_recall_curve(
         "pos_label": [Real, str, "boolean", None],
         "sample_weight": ["array-like", None],
         "drop_intermediate": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def roc_curve(
     y_true, y_score, *, pos_label=None, sample_weight=None, drop_intermediate=True
@@ -1135,7 +1146,8 @@ def roc_curve(
         "y_true": ["array-like", "sparse matrix"],
         "y_score": ["array-like"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def label_ranking_average_precision_score(y_true, y_score, *, sample_weight=None):
     """Compute ranking-based average precision.
@@ -1233,7 +1245,8 @@ def label_ranking_average_precision_score(y_true, y_score, *, sample_weight=None
         "y_true": ["array-like"],
         "y_score": ["array-like"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def coverage_error(y_true, y_score, *, sample_weight=None):
     """Coverage error measure.
@@ -1299,7 +1312,8 @@ def coverage_error(y_true, y_score, *, sample_weight=None):
         "y_true": ["array-like", "sparse matrix"],
         "y_score": ["array-like"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def label_ranking_loss(y_true, y_score, *, sample_weight=None):
     """Compute Ranking loss measure.
@@ -1516,7 +1530,8 @@ def _check_dcg_target_type(y_true):
         "log_base": [Interval(Real, 0.0, None, closed="neither")],
         "sample_weight": ["array-like", None],
         "ignore_ties": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def dcg_score(
     y_true, y_score, *, k=None, log_base=2, sample_weight=None, ignore_ties=False
@@ -1683,7 +1698,8 @@ def _ndcg_sample_scores(y_true, y_score, k=None, ignore_ties=False):
         "k": [Interval(Integral, 1, None, closed="left"), None],
         "sample_weight": ["array-like", None],
         "ignore_ties": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False):
     """Compute Normalized Discounted Cumulative Gain.
@@ -1814,7 +1830,8 @@ def ndcg_score(y_true, y_score, *, k=None, sample_weight=None, ignore_ties=False
         "normalize": ["boolean"],
         "sample_weight": ["array-like", None],
         "labels": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def top_k_accuracy_score(
     y_true, y_score, *, k=2, normalize=True, sample_weight=None, labels=None
diff --git a/sklearn/metrics/_regression.py b/sklearn/metrics/_regression.py
index 377c3f8c467cf..a6dfacf30d3e1 100644
--- a/sklearn/metrics/_regression.py
+++ b/sklearn/metrics/_regression.py
@@ -26,23 +26,22 @@
 #          Ohad Michel <ohadmich@gmail.com>
 # License: BSD 3 clause
 
-from numbers import Real
 import warnings
+from numbers import Real
 
 import numpy as np
 from scipy.special import xlogy
 
 from ..exceptions import UndefinedMetricWarning
+from ..utils._param_validation import Interval, StrOptions, validate_params
+from ..utils.stats import _weighted_percentile
 from ..utils.validation import (
+    _check_sample_weight,
+    _num_samples,
     check_array,
     check_consistent_length,
-    _num_samples,
     column_or_1d,
-    _check_sample_weight,
 )
-from ..utils.stats import _weighted_percentile
-from ..utils._param_validation import Interval, StrOptions, validate_params
-
 
 __ALL__ = [
     "max_error",
@@ -144,7 +143,8 @@ def _check_reg_targets(y_true, y_pred, multioutput, dtype="numeric"):
         "y_pred": ["array-like"],
         "sample_weight": ["array-like", None],
         "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mean_absolute_error(
     y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
@@ -223,7 +223,8 @@ def mean_absolute_error(
         "sample_weight": ["array-like", None],
         "alpha": [Interval(Real, 0, 1, closed="both")],
         "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mean_pinball_loss(
     y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
@@ -311,7 +312,8 @@ def mean_pinball_loss(
         "y_pred": ["array-like"],
         "sample_weight": ["array-like", None],
         "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mean_absolute_percentage_error(
     y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
@@ -406,7 +408,8 @@ def mean_absolute_percentage_error(
         "sample_weight": ["array-like", None],
         "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
         "squared": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mean_squared_error(
     y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True
@@ -494,7 +497,8 @@ def mean_squared_error(
         "sample_weight": ["array-like", None],
         "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
         "squared": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mean_squared_log_error(
     y_true, y_pred, *, sample_weight=None, multioutput="uniform_average", squared=True
@@ -580,7 +584,8 @@ def mean_squared_log_error(
         "y_pred": ["array-like"],
         "multioutput": [StrOptions({"raw_values", "uniform_average"}), "array-like"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def median_absolute_error(
     y_true, y_pred, *, multioutput="uniform_average", sample_weight=None
@@ -712,7 +717,8 @@ def _assemble_r2_explained_variance(
             "array-like",
         ],
         "force_finite": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def explained_variance_score(
     y_true,
@@ -850,7 +856,8 @@ def explained_variance_score(
             None,
         ],
         "force_finite": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def r2_score(
     y_true,
@@ -1013,7 +1020,8 @@ def r2_score(
     {
         "y_true": ["array-like"],
         "y_pred": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def max_error(y_true, y_pred):
     """
@@ -1086,7 +1094,8 @@ def _mean_tweedie_deviance(y_true, y_pred, sample_weight, power):
             Interval(Real, None, 0, closed="right"),
             Interval(Real, 1, None, closed="left"),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):
     """Mean Tweedie deviance regression loss.
@@ -1177,7 +1186,8 @@ def mean_tweedie_deviance(y_true, y_pred, *, sample_weight=None, power=0):
         "y_true": ["array-like"],
         "y_pred": ["array-like"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None):
     """Mean Poisson deviance regression loss.
@@ -1219,7 +1229,8 @@ def mean_poisson_deviance(y_true, y_pred, *, sample_weight=None):
         "y_true": ["array-like"],
         "y_pred": ["array-like"],
         "sample_weight": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None):
     """Mean Gamma deviance regression loss.
@@ -1266,7 +1277,8 @@ def mean_gamma_deviance(y_true, y_pred, *, sample_weight=None):
             Interval(Real, None, 0, closed="right"),
             Interval(Real, 1, None, closed="left"),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0):
     """D^2 regression score function, fraction of Tweedie deviance explained.
@@ -1378,7 +1390,8 @@ def d2_tweedie_score(y_true, y_pred, *, sample_weight=None, power=0):
             StrOptions({"raw_values", "uniform_average"}),
             "array-like",
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def d2_pinball_score(
     y_true, y_pred, *, sample_weight=None, alpha=0.5, multioutput="uniform_average"
@@ -1528,7 +1541,8 @@ def d2_pinball_score(
             StrOptions({"raw_values", "uniform_average"}),
             "array-like",
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def d2_absolute_error_score(
     y_true, y_pred, *, sample_weight=None, multioutput="uniform_average"
diff --git a/sklearn/metrics/_scorer.py b/sklearn/metrics/_scorer.py
index d67e53b3fe0ed..c8969acba1744 100644
--- a/sklearn/metrics/_scorer.py
+++ b/sklearn/metrics/_scorer.py
@@ -18,62 +18,64 @@
 #          Arnaud Joly <arnaud.v.joly@gmail.com>
 # License: Simplified BSD
 
+import copy
 import warnings
 from collections import Counter
-from inspect import signature
 from functools import partial
+from inspect import signature
 from traceback import format_exc
 
 import numpy as np
-import copy
 
+from ..base import is_regressor
+from ..utils import Bunch
+from ..utils._param_validation import HasMethods, StrOptions, validate_params
+from ..utils._response import _get_response_values
+from ..utils.metadata_routing import (
+    MetadataRequest,
+    MetadataRouter,
+    _MetadataRequester,
+    _routing_enabled,
+    get_routing_for_object,
+    process_routing,
+)
+from ..utils.multiclass import type_of_target
 from . import (
-    r2_score,
-    median_absolute_error,
-    max_error,
-    mean_absolute_error,
-    mean_squared_error,
-    mean_squared_log_error,
-    mean_poisson_deviance,
-    mean_gamma_deviance,
     accuracy_score,
-    top_k_accuracy_score,
-    f1_score,
-    roc_auc_score,
     average_precision_score,
-    precision_score,
-    recall_score,
-    log_loss,
     balanced_accuracy_score,
-    explained_variance_score,
     brier_score_loss,
+    class_likelihood_ratios,
+    explained_variance_score,
+    f1_score,
     jaccard_score,
-    mean_absolute_percentage_error,
+    log_loss,
     matthews_corrcoef,
-    class_likelihood_ratios,
+    max_error,
+    mean_absolute_error,
+    mean_absolute_percentage_error,
+    mean_gamma_deviance,
+    mean_poisson_deviance,
+    mean_squared_error,
+    mean_squared_log_error,
+    median_absolute_error,
+    precision_score,
+    r2_score,
+    recall_score,
+    roc_auc_score,
+    top_k_accuracy_score,
+)
+from .cluster import (
+    adjusted_mutual_info_score,
+    adjusted_rand_score,
+    completeness_score,
+    fowlkes_mallows_score,
+    homogeneity_score,
+    mutual_info_score,
+    normalized_mutual_info_score,
+    rand_score,
+    v_measure_score,
 )
-
-from .cluster import adjusted_rand_score
-from .cluster import rand_score
-from .cluster import homogeneity_score
-from .cluster import completeness_score
-from .cluster import v_measure_score
-from .cluster import mutual_info_score
-from .cluster import adjusted_mutual_info_score
-from .cluster import normalized_mutual_info_score
-from .cluster import fowlkes_mallows_score
-
-from ..utils import Bunch
-from ..utils.multiclass import type_of_target
-from ..base import is_regressor
-from ..utils.metadata_routing import _MetadataRequester
-from ..utils.metadata_routing import MetadataRequest
-from ..utils.metadata_routing import MetadataRouter
-from ..utils.metadata_routing import process_routing
-from ..utils.metadata_routing import get_routing_for_object
-from ..utils.metadata_routing import _routing_enabled
-from ..utils._response import _get_response_values
-from ..utils._param_validation import HasMethods, StrOptions, validate_params
 
 
 def _cached_call(cache, estimator, response_method, *args, **kwargs):
@@ -475,7 +477,8 @@ def _factory_args(self):
 @validate_params(
     {
         "scoring": [str, callable, None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def get_scorer(scoring):
     """Get a scorer from string.
@@ -635,7 +638,8 @@ def _check_multimetric_scoring(estimator, scoring):
         "greater_is_better": ["boolean"],
         "needs_proba": ["boolean"],
         "needs_threshold": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def make_scorer(
     score_func,
@@ -892,7 +896,8 @@ def get_scorer_names():
         "estimator": [HasMethods("fit")],
         "scoring": [StrOptions(set(get_scorer_names())), callable, None],
         "allow_none": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def check_scoring(estimator, scoring=None, *, allow_none=False):
     """Determine scorer from user options.
diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py
index fefb47b11903a..a332997a84414 100644
--- a/sklearn/metrics/cluster/__init__.py
+++ b/sklearn/metrics/cluster/__init__.py
@@ -5,25 +5,29 @@
 - supervised, which uses a ground truth class values for each sample.
 - unsupervised, which does not and measures the 'quality' of the model itself.
 """
-from ._supervised import adjusted_mutual_info_score
-from ._supervised import normalized_mutual_info_score
-from ._supervised import adjusted_rand_score
-from ._supervised import rand_score
-from ._supervised import completeness_score
-from ._supervised import contingency_matrix
-from ._supervised import pair_confusion_matrix
-from ._supervised import expected_mutual_information
-from ._supervised import homogeneity_completeness_v_measure
-from ._supervised import homogeneity_score
-from ._supervised import mutual_info_score
-from ._supervised import v_measure_score
-from ._supervised import fowlkes_mallows_score
-from ._supervised import entropy
-from ._unsupervised import silhouette_samples
-from ._unsupervised import silhouette_score
-from ._unsupervised import calinski_harabasz_score
-from ._unsupervised import davies_bouldin_score
 from ._bicluster import consensus_score
+from ._supervised import (
+    adjusted_mutual_info_score,
+    adjusted_rand_score,
+    completeness_score,
+    contingency_matrix,
+    entropy,
+    expected_mutual_information,
+    fowlkes_mallows_score,
+    homogeneity_completeness_v_measure,
+    homogeneity_score,
+    mutual_info_score,
+    normalized_mutual_info_score,
+    pair_confusion_matrix,
+    rand_score,
+    v_measure_score,
+)
+from ._unsupervised import (
+    calinski_harabasz_score,
+    davies_bouldin_score,
+    silhouette_samples,
+    silhouette_score,
+)
 
 __all__ = [
     "adjusted_mutual_info_score",
diff --git a/sklearn/metrics/cluster/_bicluster.py b/sklearn/metrics/cluster/_bicluster.py
index eef311afcf463..03ef841ba01cc 100644
--- a/sklearn/metrics/cluster/_bicluster.py
+++ b/sklearn/metrics/cluster/_bicluster.py
@@ -1,7 +1,7 @@
 import numpy as np
 from scipy.optimize import linear_sum_assignment
 
-from ...utils.validation import check_consistent_length, check_array
+from ...utils.validation import check_array, check_consistent_length
 
 __all__ = ["consensus_score"]
 
diff --git a/sklearn/metrics/cluster/_supervised.py b/sklearn/metrics/cluster/_supervised.py
index 256115b8b1e31..2b7a00d90eb18 100644
--- a/sklearn/metrics/cluster/_supervised.py
+++ b/sklearn/metrics/cluster/_supervised.py
@@ -23,11 +23,10 @@
 import numpy as np
 from scipy import sparse as sp
 
-from ._expected_mutual_info_fast import expected_mutual_information
+from ...utils._param_validation import Interval, StrOptions, validate_params
 from ...utils.multiclass import type_of_target
 from ...utils.validation import check_array, check_consistent_length
-from ...utils._param_validation import validate_params
-from ...utils._param_validation import Interval, StrOptions
+from ._expected_mutual_info_fast import expected_mutual_information
 
 
 def check_clusterings(labels_true, labels_pred):
@@ -99,7 +98,8 @@ def _generalized_average(U, V, average_method):
         "eps": [Interval(Real, 0, None, closed="left"), None],
         "sparse": ["boolean"],
         "dtype": "no_validation",  # delegate the validation to SciPy
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def contingency_matrix(
     labels_true, labels_pred, *, eps=None, sparse=False, dtype=np.int64
@@ -174,7 +174,8 @@ def contingency_matrix(
     {
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def pair_confusion_matrix(labels_true, labels_pred):
     """Pair confusion matrix arising from two clusterings [1]_.
@@ -206,9 +207,9 @@ def pair_confusion_matrix(labels_true, labels_pred):
 
     See Also
     --------
-    rand_score: Rand Score.
-    adjusted_rand_score: Adjusted Rand Score.
-    adjusted_mutual_info_score: Adjusted Mutual Information.
+    rand_score : Rand Score.
+    adjusted_rand_score : Adjusted Rand Score.
+    adjusted_mutual_info_score : Adjusted Mutual Information.
 
     References
     ----------
@@ -258,7 +259,8 @@ def pair_confusion_matrix(labels_true, labels_pred):
     {
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def rand_score(labels_true, labels_pred):
     """Rand index.
@@ -335,7 +337,8 @@ def rand_score(labels_true, labels_pred):
     {
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def adjusted_rand_score(labels_true, labels_pred):
     """Rand index adjusted for chance.
@@ -444,7 +447,8 @@ def adjusted_rand_score(labels_true, labels_pred):
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
         "beta": [Interval(Real, 0, None, closed="left")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0):
     """Compute the homogeneity and completeness and V-Measure scores at once.
@@ -536,7 +540,8 @@ def homogeneity_completeness_v_measure(labels_true, labels_pred, *, beta=1.0):
     {
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def homogeneity_score(labels_true, labels_pred):
     """Homogeneity metric of a cluster labeling given a ground truth.
@@ -611,7 +616,8 @@ def homogeneity_score(labels_true, labels_pred):
     {
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def completeness_score(labels_true, labels_pred):
     """Compute completeness metric of a cluster labeling given a ground truth.
@@ -687,7 +693,8 @@ def completeness_score(labels_true, labels_pred):
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
         "beta": [Interval(Real, 0, None, closed="left")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def v_measure_score(labels_true, labels_pred, *, beta=1.0):
     """V-measure cluster labeling given a ground truth.
@@ -790,7 +797,8 @@ def v_measure_score(labels_true, labels_pred, *, beta=1.0):
         "labels_true": ["array-like", None],
         "labels_pred": ["array-like", None],
         "contingency": ["array-like", "sparse matrix", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def mutual_info_score(labels_true, labels_pred, *, contingency=None):
     """Mutual Information between two clusterings.
@@ -830,9 +838,10 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
 
     contingency : {array-like, sparse matrix} of shape \
             (n_classes_true, n_classes_pred), default=None
-        A contingency matrix given by the :func:`contingency_matrix` function.
-        If value is ``None``, it will be computed, otherwise the given value is
-        used, with ``labels_true`` and ``labels_pred`` ignored.
+        A contingency matrix given by the
+        :func:`~sklearn.metrics.cluster.contingency_matrix` function. If value
+        is ``None``, it will be computed, otherwise the given value is used,
+        with ``labels_true`` and ``labels_pred`` ignored.
 
     Returns
     -------
@@ -896,7 +905,8 @@ def mutual_info_score(labels_true, labels_pred, *, contingency=None):
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
         "average_method": [StrOptions({"arithmetic", "max", "min", "geometric"})],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def adjusted_mutual_info_score(
     labels_true, labels_pred, *, average_method="arithmetic"
@@ -1028,7 +1038,8 @@ def adjusted_mutual_info_score(
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
         "average_method": [StrOptions({"arithmetic", "max", "min", "geometric"})],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def normalized_mutual_info_score(
     labels_true, labels_pred, *, average_method="arithmetic"
@@ -1142,7 +1153,8 @@ def normalized_mutual_info_score(
         "labels_true": ["array-like"],
         "labels_pred": ["array-like"],
         "sparse": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):
     """Measure the similarity of two clusterings of a set of points.
@@ -1225,7 +1237,8 @@ def fowlkes_mallows_score(labels_true, labels_pred, *, sparse=False):
 @validate_params(
     {
         "labels": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def entropy(labels):
     """Calculate the entropy for a labeling.
diff --git a/sklearn/metrics/cluster/_unsupervised.py b/sklearn/metrics/cluster/_unsupervised.py
index 43397ff9a0210..10749c23dacbe 100644
--- a/sklearn/metrics/cluster/_unsupervised.py
+++ b/sklearn/metrics/cluster/_unsupervised.py
@@ -6,24 +6,20 @@
 # License: BSD 3 clause
 
 
-from numbers import Integral
 import functools
+from numbers import Integral
 
 import numpy as np
 from scipy.sparse import issparse
 
-from ...utils import check_random_state
-from ...utils import check_X_y
-from ...utils import _safe_indexing
+from ...preprocessing import LabelEncoder
+from ...utils import _safe_indexing, check_random_state, check_X_y
 from ...utils._param_validation import (
     Interval,
     StrOptions,
     validate_params,
 )
-from ..pairwise import pairwise_distances_chunked
-from ..pairwise import pairwise_distances
-from ..pairwise import _VALID_METRICS
-from ...preprocessing import LabelEncoder
+from ..pairwise import _VALID_METRICS, pairwise_distances, pairwise_distances_chunked
 
 
 def check_number_of_labels(n_labels, n_samples):
@@ -51,7 +47,8 @@ def check_number_of_labels(n_labels, n_samples):
         "metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
         "sample_size": [Interval(Integral, 1, None, closed="left"), None],
         "random_state": ["random_state"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def silhouette_score(
     X, labels, *, metric="euclidean", sample_size=None, random_state=None, **kwds
@@ -87,8 +84,7 @@ def silhouette_score(
     metric : str or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
-        allowed by :func:`metrics.pairwise.pairwise_distances
-        <sklearn.metrics.pairwise.pairwise_distances>`. If ``X`` is
+        allowed by :func:`~sklearn.metrics.pairwise_distances`. If ``X`` is
         the distance array itself, use ``metric="precomputed"``.
 
     sample_size : int, default=None
@@ -193,7 +189,8 @@ def _silhouette_reduce(D_chunk, start, labels, label_freqs):
         "X": ["array-like", "sparse matrix"],
         "labels": ["array-like"],
         "metric": [StrOptions(set(_VALID_METRICS) | {"precomputed"}), callable],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def silhouette_samples(X, labels, *, metric="euclidean", **kwds):
     """Compute the Silhouette Coefficient for each sample.
@@ -232,7 +229,7 @@ def silhouette_samples(X, labels, *, metric="euclidean", **kwds):
     metric : str or callable, default='euclidean'
         The metric to use when calculating distance between instances in a
         feature array. If metric is a string, it must be one of the options
-        allowed by :func:`sklearn.metrics.pairwise.pairwise_distances`.
+        allowed by :func:`~sklearn.metrics.pairwise_distances`.
         If ``X`` is the distance array itself, use "precomputed" as the metric.
         Precomputed distance matrices must have 0 along the diagonal.
 
@@ -302,7 +299,8 @@ def silhouette_samples(X, labels, *, metric="euclidean", **kwds):
     {
         "X": ["array-like"],
         "labels": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def calinski_harabasz_score(X, labels):
     """Compute the Calinski and Harabasz score.
@@ -362,7 +360,8 @@ def calinski_harabasz_score(X, labels):
     {
         "X": ["array-like"],
         "labels": ["array-like"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def davies_bouldin_score(X, labels):
     """Compute the Davies-Bouldin score.
diff --git a/sklearn/metrics/cluster/tests/test_bicluster.py b/sklearn/metrics/cluster/tests/test_bicluster.py
index 2cbcb6e6826c7..53f7805100a13 100644
--- a/sklearn/metrics/cluster/tests/test_bicluster.py
+++ b/sklearn/metrics/cluster/tests/test_bicluster.py
@@ -2,10 +2,9 @@
 
 import numpy as np
 
-from sklearn.utils._testing import assert_almost_equal
-
-from sklearn.metrics.cluster._bicluster import _jaccard
 from sklearn.metrics import consensus_score
+from sklearn.metrics.cluster._bicluster import _jaccard
+from sklearn.utils._testing import assert_almost_equal
 
 
 def test_jaccard():
diff --git a/sklearn/metrics/cluster/tests/test_common.py b/sklearn/metrics/cluster/tests/test_common.py
index a4e8c4530dbe6..bc32b7df7f561 100644
--- a/sklearn/metrics/cluster/tests/test_common.py
+++ b/sklearn/metrics/cluster/tests/test_common.py
@@ -1,25 +1,25 @@
 from functools import partial
 from itertools import chain
 
-import pytest
 import numpy as np
+import pytest
 
-from sklearn.metrics.cluster import adjusted_mutual_info_score
-from sklearn.metrics.cluster import adjusted_rand_score
-from sklearn.metrics.cluster import rand_score
-from sklearn.metrics.cluster import completeness_score
-from sklearn.metrics.cluster import fowlkes_mallows_score
-from sklearn.metrics.cluster import homogeneity_score
-from sklearn.metrics.cluster import mutual_info_score
-from sklearn.metrics.cluster import normalized_mutual_info_score
-from sklearn.metrics.cluster import v_measure_score
-from sklearn.metrics.cluster import silhouette_score
-from sklearn.metrics.cluster import calinski_harabasz_score
-from sklearn.metrics.cluster import davies_bouldin_score
-
+from sklearn.metrics.cluster import (
+    adjusted_mutual_info_score,
+    adjusted_rand_score,
+    calinski_harabasz_score,
+    completeness_score,
+    davies_bouldin_score,
+    fowlkes_mallows_score,
+    homogeneity_score,
+    mutual_info_score,
+    normalized_mutual_info_score,
+    rand_score,
+    silhouette_score,
+    v_measure_score,
+)
 from sklearn.utils._testing import assert_allclose
 
-
 # Dictionaries of metrics
 # ------------------------
 # The goal of having those dictionaries is to have an easy way to call a
diff --git a/sklearn/metrics/cluster/tests/test_supervised.py b/sklearn/metrics/cluster/tests/test_supervised.py
index 4356a0a05286c..dfaa58ff62c01 100644
--- a/sklearn/metrics/cluster/tests/test_supervised.py
+++ b/sklearn/metrics/cluster/tests/test_supervised.py
@@ -2,28 +2,27 @@
 
 import numpy as np
 import pytest
+from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal
 
-from sklearn.metrics.cluster import adjusted_mutual_info_score
-from sklearn.metrics.cluster import adjusted_rand_score
-from sklearn.metrics.cluster import rand_score
-from sklearn.metrics.cluster import completeness_score
-from sklearn.metrics.cluster import contingency_matrix
-from sklearn.metrics.cluster import pair_confusion_matrix
-from sklearn.metrics.cluster import entropy
-from sklearn.metrics.cluster import expected_mutual_information
-from sklearn.metrics.cluster import fowlkes_mallows_score
-from sklearn.metrics.cluster import homogeneity_completeness_v_measure
-from sklearn.metrics.cluster import homogeneity_score
-from sklearn.metrics.cluster import mutual_info_score
-from sklearn.metrics.cluster import normalized_mutual_info_score
-from sklearn.metrics.cluster import v_measure_score
-from sklearn.metrics.cluster._supervised import _generalized_average
-from sklearn.metrics.cluster._supervised import check_clusterings
-
+from sklearn.metrics.cluster import (
+    adjusted_mutual_info_score,
+    adjusted_rand_score,
+    completeness_score,
+    contingency_matrix,
+    entropy,
+    expected_mutual_information,
+    fowlkes_mallows_score,
+    homogeneity_completeness_v_measure,
+    homogeneity_score,
+    mutual_info_score,
+    normalized_mutual_info_score,
+    pair_confusion_matrix,
+    rand_score,
+    v_measure_score,
+)
+from sklearn.metrics.cluster._supervised import _generalized_average, check_clusterings
 from sklearn.utils import assert_all_finite
 from sklearn.utils._testing import assert_almost_equal
-from numpy.testing import assert_array_equal, assert_array_almost_equal, assert_allclose
-
 
 score_funcs = [
     adjusted_rand_score,
diff --git a/sklearn/metrics/cluster/tests/test_unsupervised.py b/sklearn/metrics/cluster/tests/test_unsupervised.py
index 8be2fe5cdae99..3549b0bf22797 100644
--- a/sklearn/metrics/cluster/tests/test_unsupervised.py
+++ b/sklearn/metrics/cluster/tests/test_unsupervised.py
@@ -2,19 +2,19 @@
 
 import numpy as np
 import pytest
-
 from numpy.testing import assert_allclose
-from scipy.sparse import csr_matrix, csc_matrix, dok_matrix, lil_matrix
-from scipy.sparse import issparse
+from scipy.sparse import csc_matrix, csr_matrix, dok_matrix, issparse, lil_matrix
 
 from sklearn import datasets
-from sklearn.utils._testing import assert_array_equal
-from sklearn.metrics.cluster import silhouette_score
-from sklearn.metrics.cluster import silhouette_samples
-from sklearn.metrics.cluster._unsupervised import _silhouette_reduce
 from sklearn.metrics import pairwise_distances
-from sklearn.metrics.cluster import calinski_harabasz_score
-from sklearn.metrics.cluster import davies_bouldin_score
+from sklearn.metrics.cluster import (
+    calinski_harabasz_score,
+    davies_bouldin_score,
+    silhouette_samples,
+    silhouette_score,
+)
+from sklearn.metrics.cluster._unsupervised import _silhouette_reduce
+from sklearn.utils._testing import assert_array_equal
 
 
 def test_silhouette():
diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py
index 67b04e9382acb..ad0a7b01eb30d 100644
--- a/sklearn/metrics/pairwise.py
+++ b/sklearn/metrics/pairwise.py
@@ -8,41 +8,41 @@
 # License: BSD 3 clause
 
 import itertools
-from functools import partial
 import warnings
+from functools import partial
 
 import numpy as np
-from scipy.spatial import distance
-from scipy.sparse import csr_matrix
-from scipy.sparse import issparse
 from joblib import effective_n_jobs
+from scipy.sparse import csr_matrix, issparse
+from scipy.spatial import distance
 
 from .. import config_context
-from ..utils.validation import _num_samples
-from ..utils.validation import check_non_negative
-from ..utils import check_array
-from ..utils import gen_even_slices
-from ..utils import gen_batches, get_chunk_n_rows
-from ..utils import is_scalar_nan
-from ..utils.extmath import row_norms, safe_sparse_dot
+from ..exceptions import DataConversionWarning
 from ..preprocessing import normalize
+from ..utils import (
+    check_array,
+    gen_batches,
+    gen_even_slices,
+    get_chunk_n_rows,
+    is_scalar_nan,
+)
 from ..utils._mask import _get_mask
-from ..utils.parallel import delayed, Parallel
-from ..utils.fixes import sp_base_version, parse_version
 from ..utils._param_validation import (
-    validate_params,
-    Interval,
-    Real,
-    Integral,
     Hidden,
+    Integral,
+    Interval,
     MissingValues,
-    StrOptions,
     Options,
+    Real,
+    StrOptions,
+    validate_params,
 )
-
+from ..utils.extmath import row_norms, safe_sparse_dot
+from ..utils.fixes import parse_version, sp_base_version
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import _num_samples, check_non_negative
 from ._pairwise_distances_reduction import ArgKmin
 from ._pairwise_fast import _chi2_kernel_fast, _sparse_manhattan
-from ..exceptions import DataConversionWarning
 
 
 # Utility Functions
@@ -396,7 +396,8 @@ def _euclidean_distances(X, Y, X_norm_squared=None, Y_norm_squared=None, squared
         "squared": ["boolean"],
         "missing_values": [MissingValues(numeric_only=True)],
         "copy": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def nan_euclidean_distances(
     X, Y=None, *, squared=False, missing_values=np.nan, copy=True
@@ -782,7 +783,8 @@ def pairwise_distances_argmin_min(
             callable,
         ],
         "metric_kwargs": [dict, None],
-    }
+    },
+    prefer_skip_nested_validation=False,  # metric is not validated yet
 )
 def pairwise_distances_argmin(X, Y, *, axis=1, metric="euclidean", metric_kwargs=None):
     """Compute minimum distances between one point and a set of points.
@@ -908,7 +910,8 @@ def pairwise_distances_argmin(X, Y, *, axis=1, metric="euclidean", metric_kwargs
 
 
 @validate_params(
-    {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix", None]}
+    {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix", None]},
+    prefer_skip_nested_validation=True,
 )
 def haversine_distances(X, Y=None):
     """Compute the Haversine distance between samples in X and Y.
@@ -919,8 +922,9 @@ def haversine_distances(X, Y=None):
     in radians. The dimension of the data must be 2.
 
     .. math::
-       D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x1 - y1) / 2)
-                                + \\cos(x1)\\cos(y1)\\sin^2((x2 - y2) / 2)}]
+       D(x, y) = 2\\arcsin[\\sqrt{\\sin^2((x_{lat} - y_{lat}) / 2)
+                                + \\cos(x_{lat})\\cos(y_{lat})\\
+                                sin^2((x_{lon} - y_{lon}) / 2)}]
 
     Parameters
     ----------
@@ -968,7 +972,8 @@ def haversine_distances(X, Y=None):
         "X": ["array-like", "sparse matrix"],
         "Y": ["array-like", "sparse matrix", None],
         "sum_over_features": ["boolean", Hidden(StrOptions({"deprecated"}))],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def manhattan_distances(X, Y=None, *, sum_over_features="deprecated"):
     """Compute the L1 distances between the vectors in X and Y.
@@ -1067,7 +1072,8 @@ def manhattan_distances(X, Y=None, *, sum_over_features="deprecated"):
     {
         "X": ["array-like", "sparse matrix"],
         "Y": ["array-like", "sparse matrix", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def cosine_distances(X, Y=None):
     """Compute cosine distance between samples in X and Y.
@@ -1109,7 +1115,8 @@ def cosine_distances(X, Y=None):
 
 # Paired distances
 @validate_params(
-    {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]}
+    {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]},
+    prefer_skip_nested_validation=True,
 )
 def paired_euclidean_distances(X, Y):
     """Compute the paired euclidean distances between X and Y.
@@ -1135,7 +1142,8 @@ def paired_euclidean_distances(X, Y):
 
 
 @validate_params(
-    {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]}
+    {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]},
+    prefer_skip_nested_validation=True,
 )
 def paired_manhattan_distances(X, Y):
     """Compute the paired L1 distances between X and Y.
@@ -1178,7 +1186,8 @@ def paired_manhattan_distances(X, Y):
 
 
 @validate_params(
-    {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]}
+    {"X": ["array-like", "sparse matrix"], "Y": ["array-like", "sparse matrix"]},
+    prefer_skip_nested_validation=True,
 )
 def paired_cosine_distances(X, Y):
     """
@@ -1257,7 +1266,8 @@ def paired_distances(X, Y, *, metric="euclidean", **kwds):
 
     See Also
     --------
-    pairwise_distances : Computes the distance between every pair of samples.
+    sklearn.metrics.pairwise_distances : Computes the distance between every pair of
+        samples.
 
     Examples
     --------
@@ -1288,7 +1298,8 @@ def paired_distances(X, Y, *, metric="euclidean", **kwds):
         "X": ["array-like", "sparse matrix"],
         "Y": ["array-like", "sparse matrix", None],
         "dense_output": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def linear_kernel(X, Y=None, dense_output=True):
     """
@@ -1330,7 +1341,8 @@ def linear_kernel(X, Y=None, dense_output=True):
             Hidden(np.ndarray),
         ],
         "coef0": [Interval(Real, None, None, closed="neither")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):
     """
@@ -1383,7 +1395,8 @@ def polynomial_kernel(X, Y=None, degree=3, gamma=None, coef0=1):
             Hidden(np.ndarray),
         ],
         "coef0": [Interval(Real, None, None, closed="neither")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
     """Compute the sigmoid kernel between X and Y.
@@ -1431,7 +1444,8 @@ def sigmoid_kernel(X, Y=None, gamma=None, coef0=1):
             None,
             Hidden(np.ndarray),
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def rbf_kernel(X, Y=None, gamma=None):
     """Compute the rbf (gaussian) kernel between X and Y.
@@ -1477,7 +1491,8 @@ def rbf_kernel(X, Y=None, gamma=None):
             Hidden(np.ndarray),
             None,
         ],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def laplacian_kernel(X, Y=None, gamma=None):
     """Compute the laplacian kernel between X and Y.
@@ -1521,7 +1536,8 @@ def laplacian_kernel(X, Y=None, gamma=None):
         "X": ["array-like", "sparse matrix"],
         "Y": ["array-like", "sparse matrix", None],
         "dense_output": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def cosine_similarity(X, Y=None, dense_output=True):
     """Compute cosine similarity between samples in X and Y.
@@ -1572,7 +1588,10 @@ def cosine_similarity(X, Y=None, dense_output=True):
     return K
 
 
-@validate_params({"X": ["array-like"], "Y": ["array-like", None]})
+@validate_params(
+    {"X": ["array-like"], "Y": ["array-like", None]},
+    prefer_skip_nested_validation=True,
+)
 def additive_chi2_kernel(X, Y=None):
     """Compute the additive chi-squared kernel between observations in X and Y.
 
@@ -2122,8 +2141,8 @@ def pairwise_distances(
     pairwise_distances_chunked : Performs the same calculation as this
         function, but returns a generator of chunks of the distance matrix, in
         order to limit memory usage.
-    paired_distances : Computes the distances between corresponding elements
-        of two arrays.
+    sklearn.metrics.pairwise.paired_distances : Computes the distances between
+        corresponding elements of two arrays.
     """
     if (
         metric not in _VALID_METRICS
@@ -2236,7 +2255,7 @@ def kernel_metrics():
 
     Returns
     -------
-    kernal_metrics : dict
+    kernel_metrics : dict
         Returns valid metrics for pairwise_kernels.
     """
     return PAIRWISE_KERNEL_FUNCTIONS
diff --git a/sklearn/metrics/tests/test_classification.py b/sklearn/metrics/tests/test_classification.py
index 382f09c5e8eb4..afa3b90d5e8a9 100644
--- a/sklearn/metrics/tests/test_classification.py
+++ b/sklearn/metrics/tests/test_classification.py
@@ -1,55 +1,55 @@
-from functools import partial
-from itertools import product
-from itertools import chain
-from itertools import permutations
-import warnings
 import re
+import warnings
+from functools import partial
+from itertools import chain, permutations, product
 
 import numpy as np
+import pytest
 from scipy import linalg
+from scipy.spatial.distance import hamming as sp_hamming
 from scipy.stats import bernoulli
-import pytest
-
-from sklearn import datasets
-from sklearn import svm
 
+from sklearn import datasets, svm
 from sklearn.datasets import make_multilabel_classification
-from sklearn.preprocessing import label_binarize, LabelBinarizer
-from sklearn.utils.validation import check_random_state
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_no_warnings
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils._mocking import MockDataFrame
-
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import average_precision_score
-from sklearn.metrics import balanced_accuracy_score
-from sklearn.metrics import class_likelihood_ratios
-from sklearn.metrics import classification_report
-from sklearn.metrics import cohen_kappa_score
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import f1_score
-from sklearn.metrics import fbeta_score
-from sklearn.metrics import hamming_loss
-from sklearn.metrics import hinge_loss
-from sklearn.metrics import jaccard_score
-from sklearn.metrics import log_loss
-from sklearn.metrics import matthews_corrcoef
-from sklearn.metrics import precision_recall_fscore_support
-from sklearn.metrics import precision_score
-from sklearn.metrics import recall_score
-from sklearn.metrics import zero_one_loss
-from sklearn.metrics import brier_score_loss
-from sklearn.metrics import multilabel_confusion_matrix
-
-from sklearn.metrics._classification import _check_targets
 from sklearn.exceptions import UndefinedMetricWarning
+from sklearn.metrics import (
+    accuracy_score,
+    average_precision_score,
+    balanced_accuracy_score,
+    brier_score_loss,
+    class_likelihood_ratios,
+    classification_report,
+    cohen_kappa_score,
+    confusion_matrix,
+    f1_score,
+    fbeta_score,
+    hamming_loss,
+    hinge_loss,
+    jaccard_score,
+    log_loss,
+    make_scorer,
+    matthews_corrcoef,
+    multilabel_confusion_matrix,
+    precision_recall_fscore_support,
+    precision_score,
+    recall_score,
+    zero_one_loss,
+)
+from sklearn.metrics._classification import _check_targets
+from sklearn.model_selection import cross_val_score
+from sklearn.preprocessing import LabelBinarizer, label_binarize
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._mocking import MockDataFrame
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_no_warnings,
+    ignore_warnings,
+)
 from sklearn.utils.extmath import _nanaverage
-
-from scipy.spatial.distance import hamming as sp_hamming
+from sklearn.utils.validation import check_random_state
 
 ###############################################################################
 # Utilities for testing
@@ -162,10 +162,10 @@ def test_classification_report_dictionary_output():
             for metric in expected_report[key]:
                 assert_almost_equal(expected_report[key][metric], report[key][metric])
 
-    assert type(expected_report["setosa"]["precision"]) == float
-    assert type(expected_report["macro avg"]["precision"]) == float
-    assert type(expected_report["setosa"]["support"]) == int
-    assert type(expected_report["macro avg"]["support"]) == int
+    assert isinstance(expected_report["setosa"]["precision"], float)
+    assert isinstance(expected_report["macro avg"]["precision"], float)
+    assert isinstance(expected_report["setosa"]["support"], int)
+    assert isinstance(expected_report["macro avg"]["support"], int)
 
 
 def test_classification_report_output_dict_empty_input():
@@ -2699,7 +2699,7 @@ def test_log_loss_pandas_input():
     y_pr = np.array([[0.2, 0.7], [0.6, 0.5], [0.4, 0.1], [0.7, 0.2]])
     types = [(MockDataFrame, MockDataFrame)]
     try:
-        from pandas import Series, DataFrame
+        from pandas import DataFrame, Series
 
         types.append((Series, DataFrame))
     except ImportError:
@@ -2805,3 +2805,27 @@ def test_classification_metric_pos_label_types(metric, classes):
         y_pred = y_true.copy()
     result = metric(y_true, y_pred, pos_label=pos_label)
     assert not np.any(np.isnan(result))
+
+
+@pytest.mark.parametrize(
+    "scoring",
+    [
+        make_scorer(f1_score, zero_division=np.nan),
+        make_scorer(fbeta_score, beta=2, zero_division=np.nan),
+        make_scorer(precision_score, zero_division=np.nan),
+        make_scorer(recall_score, zero_division=np.nan),
+    ],
+)
+def test_classification_metric_division_by_zero_nan_validaton(scoring):
+    """Check that we validate `np.nan` properly for classification metrics.
+
+    With `n_jobs=2` in cross-validation, the `np.nan` used for the singleton will be
+    different in the sub-process and we should not use the `is` operator but
+    `math.isnan`.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27563
+    """
+    X, y = datasets.make_classification(random_state=0)
+    classifier = DecisionTreeClassifier(max_depth=3, random_state=0).fit(X, y)
+    cross_val_score(classifier, X, y, scoring=scoring, n_jobs=2, error_score="raise")
diff --git a/sklearn/metrics/tests/test_common.py b/sklearn/metrics/tests/test_common.py
index 6a4ecb1e96988..6b132ccd2c37a 100644
--- a/sklearn/metrics/tests/test_common.py
+++ b/sklearn/metrics/tests/test_common.py
@@ -1,71 +1,67 @@
 from functools import partial
 from inspect import signature
-from itertools import product
-from itertools import chain
-from itertools import permutations
+from itertools import chain, permutations, product
 
 import numpy as np
-import scipy.sparse as sp
-
 import pytest
+import scipy.sparse as sp
 
 from sklearn.datasets import make_multilabel_classification
+from sklearn.metrics import (
+    accuracy_score,
+    average_precision_score,
+    balanced_accuracy_score,
+    brier_score_loss,
+    cohen_kappa_score,
+    confusion_matrix,
+    coverage_error,
+    d2_absolute_error_score,
+    d2_pinball_score,
+    d2_tweedie_score,
+    dcg_score,
+    det_curve,
+    explained_variance_score,
+    f1_score,
+    fbeta_score,
+    hamming_loss,
+    hinge_loss,
+    jaccard_score,
+    label_ranking_average_precision_score,
+    label_ranking_loss,
+    log_loss,
+    matthews_corrcoef,
+    max_error,
+    mean_absolute_error,
+    mean_absolute_percentage_error,
+    mean_gamma_deviance,
+    mean_pinball_loss,
+    mean_poisson_deviance,
+    mean_squared_error,
+    mean_tweedie_deviance,
+    median_absolute_error,
+    multilabel_confusion_matrix,
+    ndcg_score,
+    precision_recall_curve,
+    precision_score,
+    r2_score,
+    recall_score,
+    roc_auc_score,
+    roc_curve,
+    top_k_accuracy_score,
+    zero_one_loss,
+)
+from sklearn.metrics._base import _average_binary_score
 from sklearn.preprocessing import LabelBinarizer
-from sklearn.utils.multiclass import type_of_target
-from sklearn.utils.validation import _num_samples
-from sklearn.utils.validation import check_random_state
 from sklearn.utils import shuffle
-
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_less
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import average_precision_score
-from sklearn.metrics import balanced_accuracy_score
-from sklearn.metrics import brier_score_loss
-from sklearn.metrics import cohen_kappa_score
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import coverage_error
-from sklearn.metrics import d2_tweedie_score
-from sklearn.metrics import d2_pinball_score
-from sklearn.metrics import d2_absolute_error_score
-from sklearn.metrics import det_curve
-from sklearn.metrics import explained_variance_score
-from sklearn.metrics import f1_score
-from sklearn.metrics import fbeta_score
-from sklearn.metrics import hamming_loss
-from sklearn.metrics import hinge_loss
-from sklearn.metrics import jaccard_score
-from sklearn.metrics import label_ranking_average_precision_score
-from sklearn.metrics import label_ranking_loss
-from sklearn.metrics import log_loss
-from sklearn.metrics import max_error
-from sklearn.metrics import matthews_corrcoef
-from sklearn.metrics import mean_absolute_error
-from sklearn.metrics import mean_absolute_percentage_error
-from sklearn.metrics import mean_squared_error
-from sklearn.metrics import mean_tweedie_deviance
-from sklearn.metrics import mean_poisson_deviance
-from sklearn.metrics import mean_gamma_deviance
-from sklearn.metrics import median_absolute_error
-from sklearn.metrics import multilabel_confusion_matrix
-from sklearn.metrics import mean_pinball_loss
-from sklearn.metrics import precision_recall_curve
-from sklearn.metrics import precision_score
-from sklearn.metrics import r2_score
-from sklearn.metrics import recall_score
-from sklearn.metrics import roc_auc_score
-from sklearn.metrics import roc_curve
-from sklearn.metrics import zero_one_loss
-from sklearn.metrics import ndcg_score
-from sklearn.metrics import dcg_score
-from sklearn.metrics import top_k_accuracy_score
-
-from sklearn.metrics._base import _average_binary_score
-
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_equal,
+    assert_array_less,
+    ignore_warnings,
+)
+from sklearn.utils.multiclass import type_of_target
+from sklearn.utils.validation import _num_samples, check_random_state
 
 # Note toward developers about metric testing
 # -------------------------------------------
diff --git a/sklearn/metrics/tests/test_dist_metrics.py b/sklearn/metrics/tests/test_dist_metrics.py
index fc9b006e2fefd..be5d12f0414b8 100644
--- a/sklearn/metrics/tests/test_dist_metrics.py
+++ b/sklearn/metrics/tests/test_dist_metrics.py
@@ -1,22 +1,21 @@
+import copy
 import itertools
 import pickle
-import copy
 
 import numpy as np
 import pytest
-
 import scipy.sparse as sp
 from scipy.spatial.distance import cdist
-from sklearn.metrics import DistanceMetric
 
+from sklearn.metrics import DistanceMetric
 from sklearn.metrics._dist_metrics import (
     BOOL_METRICS,
     DistanceMetric32,
     DistanceMetric64,
 )
-
 from sklearn.utils import check_random_state
 from sklearn.utils._testing import assert_allclose, create_memmap_backed_data
+from sklearn.utils.fixes import parse_version, sp_version
 
 
 def dist_func(x1, x2, p):
@@ -44,18 +43,17 @@ def dist_func(x1, x2, p):
 V = rng.random_sample((d, d))
 VI = np.dot(V, V.T)
 
-
 METRICS_DEFAULT_PARAMS = [
     ("euclidean", {}),
     ("cityblock", {}),
-    ("minkowski", dict(p=(1, 1.5, 2, 3))),
+    ("minkowski", dict(p=(0.5, 1, 1.5, 2, 3))),
     ("chebyshev", {}),
     ("seuclidean", dict(V=(rng.random_sample(d),))),
     ("mahalanobis", dict(VI=(VI,))),
     ("hamming", {}),
     ("canberra", {}),
     ("braycurtis", {}),
-    ("minkowski", dict(p=(1, 1.5, 3), w=(rng.random_sample(d),))),
+    ("minkowski", dict(p=(0.5, 1, 1.5, 3), w=(rng.random_sample(d),))),
 ]
 
 
@@ -78,6 +76,13 @@ def test_cdist(metric_param_grid, X, Y):
             # with scipy
             rtol_dict = {"rtol": 1e-6}
 
+        # TODO: Remove when scipy minimum version >= 1.7.0
+        # scipy supports 0<p<1 for minkowski metric >= 1.7.0
+        if metric == "minkowski":
+            p = kwargs["p"]
+            if sp_version < parse_version("1.7.0") and p < 1:
+                pytest.skip("scipy does not support 0<p<1 for minkowski metric < 1.7.0")
+
         D_scipy_cdist = cdist(X, Y, metric, **kwargs)
 
         dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs)
@@ -152,6 +157,12 @@ def test_pdist(metric_param_grid, X):
             # with scipy
             rtol_dict = {"rtol": 1e-6}
 
+        # TODO: Remove when scipy minimum version >= 1.7.0
+        # scipy supports 0<p<1 for minkowski metric >= 1.7.0
+        if metric == "minkowski":
+            p = kwargs["p"]
+            if sp_version < parse_version("1.7.0") and p < 1:
+                pytest.skip("scipy does not support 0<p<1 for minkowski metric < 1.7.0")
         D_scipy_pdist = cdist(X, X, metric, **kwargs)
 
         dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs)
@@ -399,3 +410,9 @@ def test_get_metric_bad_dtype():
     msg = r"Unexpected dtype .* provided. Please select a dtype from"
     with pytest.raises(ValueError, match=msg):
         DistanceMetric.get_metric("manhattan", dtype)
+
+
+def test_minkowski_metric_validate_bad_p_parameter():
+    msg = "p must be greater than 0"
+    with pytest.raises(ValueError, match=msg):
+        DistanceMetric.get_metric("minkowski", p=0)
diff --git a/sklearn/metrics/tests/test_pairwise.py b/sklearn/metrics/tests/test_pairwise.py
index 88555c9e48ce9..1574d007bfdfb 100644
--- a/sklearn/metrics/tests/test_pairwise.py
+++ b/sklearn/metrics/tests/test_pairwise.py
@@ -3,10 +3,15 @@
 
 import numpy as np
 from numpy import linalg
-
-from scipy.sparse import dok_matrix, csr_matrix, issparse
-from scipy.spatial.distance import cosine, cityblock, minkowski
-from scipy.spatial.distance import cdist, pdist, squareform
+from scipy.sparse import csr_matrix, dok_matrix, issparse
+from scipy.spatial.distance import (
+    cdist,
+    cityblock,
+    cosine,
+    minkowski,
+    pdist,
+    squareform,
+)
 
 try:
     from scipy.spatial.distance import wminkowski
@@ -15,48 +20,50 @@
     # should be used instead.
     from scipy.spatial.distance import minkowski as wminkowski
 
-from sklearn.utils.fixes import sp_version, parse_version
-from sklearn.utils.parallel import delayed, Parallel
-
 import pytest
 
 from sklearn import config_context
-
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.metrics.pairwise import euclidean_distances
-from sklearn.metrics.pairwise import nan_euclidean_distances
-from sklearn.metrics.pairwise import manhattan_distances
-from sklearn.metrics.pairwise import haversine_distances
-from sklearn.metrics.pairwise import linear_kernel
-from sklearn.metrics.pairwise import chi2_kernel, additive_chi2_kernel
-from sklearn.metrics.pairwise import polynomial_kernel
-from sklearn.metrics.pairwise import rbf_kernel
-from sklearn.metrics.pairwise import laplacian_kernel
-from sklearn.metrics.pairwise import sigmoid_kernel
-from sklearn.metrics.pairwise import cosine_similarity
-from sklearn.metrics.pairwise import cosine_distances
-from sklearn.metrics.pairwise import pairwise_distances
-from sklearn.metrics.pairwise import pairwise_distances_chunked
-from sklearn.metrics.pairwise import pairwise_distances_argmin_min
-from sklearn.metrics.pairwise import pairwise_distances_argmin
-from sklearn.metrics.pairwise import pairwise_kernels
-from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS
-from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
-from sklearn.metrics.pairwise import PAIRWISE_BOOLEAN_FUNCTIONS
-from sklearn.metrics.pairwise import PAIRED_DISTANCES
-from sklearn.metrics.pairwise import check_pairwise_arrays
-from sklearn.metrics.pairwise import check_paired_arrays
-from sklearn.metrics.pairwise import paired_distances
-from sklearn.metrics.pairwise import paired_euclidean_distances
-from sklearn.metrics.pairwise import paired_manhattan_distances
-from sklearn.metrics.pairwise import paired_cosine_distances
-from sklearn.metrics.pairwise import _euclidean_distances_upcast
-from sklearn.preprocessing import normalize
 from sklearn.exceptions import DataConversionWarning
+from sklearn.metrics.pairwise import (
+    PAIRED_DISTANCES,
+    PAIRWISE_BOOLEAN_FUNCTIONS,
+    PAIRWISE_DISTANCE_FUNCTIONS,
+    PAIRWISE_KERNEL_FUNCTIONS,
+    _euclidean_distances_upcast,
+    additive_chi2_kernel,
+    check_paired_arrays,
+    check_pairwise_arrays,
+    chi2_kernel,
+    cosine_distances,
+    cosine_similarity,
+    euclidean_distances,
+    haversine_distances,
+    laplacian_kernel,
+    linear_kernel,
+    manhattan_distances,
+    nan_euclidean_distances,
+    paired_cosine_distances,
+    paired_distances,
+    paired_euclidean_distances,
+    paired_manhattan_distances,
+    pairwise_distances,
+    pairwise_distances_argmin,
+    pairwise_distances_argmin_min,
+    pairwise_distances_chunked,
+    pairwise_kernels,
+    polynomial_kernel,
+    rbf_kernel,
+    sigmoid_kernel,
+)
+from sklearn.preprocessing import normalize
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.fixes import parse_version, sp_version
+from sklearn.utils.parallel import Parallel, delayed
 
 
 def test_pairwise_distances(global_dtype):
diff --git a/sklearn/metrics/tests/test_pairwise_distances_reduction.py b/sklearn/metrics/tests/test_pairwise_distances_reduction.py
index 18f05df4592cb..5fcf980fbe39b 100644
--- a/sklearn/metrics/tests/test_pairwise_distances_reduction.py
+++ b/sklearn/metrics/tests/test_pairwise_distances_reduction.py
@@ -2,25 +2,25 @@
 import re
 import warnings
 from collections import defaultdict
+from math import floor, log10
 
 import numpy as np
 import pytest
 import threadpoolctl
-from math import log10, floor
 from scipy.sparse import csr_matrix
 from scipy.spatial.distance import cdist
 
+from sklearn.metrics import euclidean_distances
 from sklearn.metrics._pairwise_distances_reduction import (
-    BaseDistancesReductionDispatcher,
     ArgKmin,
     ArgKminClassMode,
+    BaseDistancesReductionDispatcher,
     RadiusNeighbors,
     sqeuclidean_row_norms,
 )
-from sklearn.metrics import euclidean_distances
 from sklearn.utils._testing import (
-    assert_array_equal,
     assert_allclose,
+    assert_array_equal,
     create_memmap_backed_data,
 )
 
diff --git a/sklearn/metrics/tests/test_ranking.py b/sklearn/metrics/tests/test_ranking.py
index 2bcb3a347d4a2..ac7922c18695e 100644
--- a/sklearn/metrics/tests/test_ranking.py
+++ b/sklearn/metrics/tests/test_ranking.py
@@ -1,43 +1,46 @@
 import re
-import pytest
-import numpy as np
 import warnings
-from scipy.sparse import csr_matrix
-from scipy import stats
 
-from sklearn import datasets
-from sklearn import svm
+import numpy as np
+import pytest
+from scipy import stats
+from scipy.sparse import csr_matrix
 
-from sklearn.utils.extmath import softmax
+from sklearn import datasets, svm
 from sklearn.datasets import make_multilabel_classification
-from sklearn.random_projection import _sparse_random_matrix
-from sklearn.utils.validation import check_array, check_consistent_length
-from sklearn.utils.validation import check_random_state
-
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import auc
-from sklearn.metrics import average_precision_score
-from sklearn.metrics import coverage_error
-from sklearn.metrics import det_curve
-from sklearn.metrics import label_ranking_average_precision_score
-from sklearn.metrics import precision_recall_curve
-from sklearn.metrics import label_ranking_loss
-from sklearn.metrics import roc_auc_score
-from sklearn.metrics import roc_curve
-from sklearn.metrics._ranking import _ndcg_sample_scores, _dcg_sample_scores
-from sklearn.metrics import ndcg_score, dcg_score
-from sklearn.metrics import top_k_accuracy_score
-
 from sklearn.exceptions import UndefinedMetricWarning
-from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import (
+    accuracy_score,
+    auc,
+    average_precision_score,
+    coverage_error,
+    dcg_score,
+    det_curve,
+    label_ranking_average_precision_score,
+    label_ranking_loss,
+    ndcg_score,
+    precision_recall_curve,
+    roc_auc_score,
+    roc_curve,
+    top_k_accuracy_score,
+)
+from sklearn.metrics._ranking import _dcg_sample_scores, _ndcg_sample_scores
+from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import label_binarize
-
+from sklearn.random_projection import _sparse_random_matrix
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.extmath import softmax
+from sklearn.utils.validation import (
+    check_array,
+    check_consistent_length,
+    check_random_state,
+)
 
 ###############################################################################
 # Utilities for testing
diff --git a/sklearn/metrics/tests/test_regression.py b/sklearn/metrics/tests/test_regression.py
index d9065edb9dfb3..f0486d1e942e6 100644
--- a/sklearn/metrics/tests/test_regression.py
+++ b/sklearn/metrics/tests/test_regression.py
@@ -1,34 +1,36 @@
+from itertools import product
+
 import numpy as np
-from scipy import optimize
+import pytest
 from numpy.testing import assert_allclose
+from scipy import optimize
 from scipy.special import factorial, xlogy
-from itertools import product
-import pytest
 
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.dummy import DummyRegressor
-from sklearn.model_selection import GridSearchCV
-
-from sklearn.metrics import explained_variance_score
-from sklearn.metrics import mean_absolute_error
-from sklearn.metrics import mean_squared_error
-from sklearn.metrics import mean_squared_log_error
-from sklearn.metrics import median_absolute_error
-from sklearn.metrics import mean_absolute_percentage_error
-from sklearn.metrics import max_error
-from sklearn.metrics import mean_pinball_loss
-from sklearn.metrics import r2_score
-from sklearn.metrics import mean_tweedie_deviance
-from sklearn.metrics import d2_tweedie_score
-from sklearn.metrics import d2_pinball_score
-from sklearn.metrics import d2_absolute_error_score
-from sklearn.metrics import make_scorer
-
-from sklearn.metrics._regression import _check_reg_targets
-
 from sklearn.exceptions import UndefinedMetricWarning
+from sklearn.metrics import (
+    d2_absolute_error_score,
+    d2_pinball_score,
+    d2_tweedie_score,
+    explained_variance_score,
+    make_scorer,
+    max_error,
+    mean_absolute_error,
+    mean_absolute_percentage_error,
+    mean_pinball_loss,
+    mean_squared_error,
+    mean_squared_log_error,
+    mean_tweedie_deviance,
+    median_absolute_error,
+    r2_score,
+)
+from sklearn.metrics._regression import _check_reg_targets
+from sklearn.model_selection import GridSearchCV
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 
 def test_regression_metrics(n_samples=50):
diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py
index 4d7bf48c098a1..a72392e44d786 100644
--- a/sklearn/metrics/tests/test_score_objects.py
+++ b/sklearn/metrics/tests/test_score_objects.py
@@ -1,64 +1,68 @@
-from copy import deepcopy
+import numbers
+import os
 import pickle
-import tempfile
 import shutil
-import os
-import numbers
-from unittest.mock import Mock
+import tempfile
+from copy import deepcopy
 from functools import partial
+from unittest.mock import Mock
 
+import joblib
 import numpy as np
 import pytest
-import joblib
-
 from numpy.testing import assert_allclose
-from sklearn import config_context
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils.metadata_routing import MetadataRouter
-from sklearn.tests.test_metadata_routing import assert_request_is_empty
 
+from sklearn import config_context
 from sklearn.base import BaseEstimator
+from sklearn.cluster import KMeans
+from sklearn.datasets import (
+    load_diabetes,
+    make_blobs,
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
+from sklearn.linear_model import LogisticRegression, Perceptron, Ridge
 from sklearn.metrics import (
     accuracy_score,
-    balanced_accuracy_score,
     average_precision_score,
+    balanced_accuracy_score,
     brier_score_loss,
+    check_scoring,
     f1_score,
     fbeta_score,
+    get_scorer,
+    get_scorer_names,
     jaccard_score,
     log_loss,
+    make_scorer,
+    matthews_corrcoef,
     precision_score,
     r2_score,
     recall_score,
     roc_auc_score,
     top_k_accuracy_score,
-    matthews_corrcoef,
 )
 from sklearn.metrics import cluster as cluster_module
-from sklearn.metrics import check_scoring
 from sklearn.metrics._scorer import (
-    _PredictScorer,
-    _PassthroughScorer,
-    _MultimetricScorer,
     _check_multimetric_scoring,
+    _MultimetricScorer,
+    _PassthroughScorer,
+    _PredictScorer,
 )
-from sklearn.metrics import make_scorer, get_scorer, get_scorer_names
+from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
+from sklearn.multiclass import OneVsRestClassifier
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import LinearSVC
 from sklearn.pipeline import make_pipeline
-from sklearn.cluster import KMeans
-from sklearn.linear_model import Ridge, LogisticRegression, Perceptron
+from sklearn.svm import LinearSVC
+from sklearn.tests.test_metadata_routing import assert_request_is_empty
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.datasets import make_blobs
-from sklearn.datasets import make_classification, make_regression
-from sklearn.datasets import make_multilabel_classification
-from sklearn.datasets import load_diabetes
-from sklearn.model_selection import train_test_split, cross_val_score
-from sklearn.model_selection import GridSearchCV
-from sklearn.multiclass import OneVsRestClassifier
-
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.metadata_routing import MetadataRouter
 
 REGRESSION_SCORERS = [
     "explained_variance",
@@ -537,12 +541,15 @@ def test_thresholded_scorers_multilabel_indicator_data():
 
     # Multi-output multi-class decision_function
     # TODO Is there any yet?
-    clf = DecisionTreeClassifier()
-    clf.fit(X_train, y_train)
-    clf._predict_proba = clf.predict_proba
-    clf.predict_proba = None
-    clf.decision_function = lambda X: [p[:, 1] for p in clf._predict_proba(X)]
+    class TreeWithDecisionFunction(DecisionTreeClassifier):
+        # disable predict_proba
+        predict_proba = None
+
+        def decision_function(self, X):
+            return [p[:, 1] for p in DecisionTreeClassifier.predict_proba(self, X)]
 
+    clf = TreeWithDecisionFunction()
+    clf.fit(X_train, y_train)
     y_proba = clf.decision_function(X_test)
     score1 = get_scorer("roc_auc")(clf, X_test, y_test)
     score2 = roc_auc_score(y_test, np.vstack([p for p in y_proba]).T)
@@ -795,7 +802,19 @@ def test_multimetric_scorer_calls_method_once(
     assert decision_function_func.call_count == expected_decision_func_count
 
 
-def test_multimetric_scorer_calls_method_once_classifier_no_decision():
+@pytest.mark.parametrize(
+    "scorers",
+    [
+        (["roc_auc", "neg_log_loss"]),
+        (
+            {
+                "roc_auc": make_scorer(roc_auc_score, needs_threshold=True),
+                "neg_log_loss": make_scorer(log_loss, needs_proba=True),
+            }
+        ),
+    ],
+)
+def test_multimetric_scorer_calls_method_once_classifier_no_decision(scorers):
     predict_proba_call_cnt = 0
 
     class MockKNeighborsClassifier(KNeighborsClassifier):
@@ -810,7 +829,6 @@ def predict_proba(self, X):
     clf = MockKNeighborsClassifier(n_neighbors=1)
     clf.fit(X, y)
 
-    scorers = ["roc_auc", "neg_log_loss"]
     scorer_dict = _check_multimetric_scoring(clf, scorers)
     scorer = _MultimetricScorer(scorers=scorer_dict)
     scorer(clf, X, y)
@@ -833,7 +851,7 @@ def predict(self, X):
     clf = MockDecisionTreeRegressor()
     clf.fit(X, y)
 
-    scorers = {"neg_mse": "neg_mean_squared_error", "r2": "roc_auc"}
+    scorers = {"neg_mse": "neg_mean_squared_error", "r2": "r2"}
     scorer_dict = _check_multimetric_scoring(clf, scorers)
     scorer = _MultimetricScorer(scorers=scorer_dict)
     scorer(clf, X, y)
@@ -1342,3 +1360,18 @@ def score(y_true, y_pred, param=None):
     with config_context(enable_metadata_routing=False):
         with pytest.raises(ValueError, match="kwargs is only supported if"):
             scorer(clf, X, y, param="blah")
+
+
+def test_get_scorer_multilabel_indicator():
+    """Check that our scorer deal with multi-label indicator matrices.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/26817
+    """
+    X, Y = make_multilabel_classification(n_samples=72, n_classes=3, random_state=0)
+    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0)
+
+    estimator = KNeighborsClassifier().fit(X_train, Y_train)
+
+    score = get_scorer("average_precision")(estimator, X_test, Y_test)
+    assert score > 0.8
diff --git a/sklearn/mixture/__init__.py b/sklearn/mixture/__init__.py
index c5c20aa38eb18..f0018196ffc98 100644
--- a/sklearn/mixture/__init__.py
+++ b/sklearn/mixture/__init__.py
@@ -2,8 +2,7 @@
 The :mod:`sklearn.mixture` module implements mixture modeling algorithms.
 """
 
-from ._gaussian_mixture import GaussianMixture
 from ._bayesian_mixture import BayesianGaussianMixture
-
+from ._gaussian_mixture import GaussianMixture
 
 __all__ = ["GaussianMixture", "BayesianGaussianMixture"]
diff --git a/sklearn/mixture/_base.py b/sklearn/mixture/_base.py
index fbca4f1d49dcd..9fb1c232c1012 100644
--- a/sklearn/mixture/_base.py
+++ b/sklearn/mixture/_base.py
@@ -6,21 +6,19 @@
 
 import warnings
 from abc import ABCMeta, abstractmethod
-from time import time
 from numbers import Integral, Real
+from time import time
 
 import numpy as np
 from scipy.special import logsumexp
 
 from .. import cluster
+from ..base import BaseEstimator, DensityMixin, _fit_context
 from ..cluster import kmeans_plusplus
-from ..base import BaseEstimator
-from ..base import DensityMixin
-from ..base import _fit_context
 from ..exceptions import ConvergenceWarning
 from ..utils import check_random_state
-from ..utils.validation import check_is_fitted
 from ..utils._param_validation import Interval, StrOptions
+from ..utils.validation import check_is_fitted
 
 
 def _check_shape(param, param_shape, name):
diff --git a/sklearn/mixture/_bayesian_mixture.py b/sklearn/mixture/_bayesian_mixture.py
index da4eadedff44f..f4169b3e1f4ee 100644
--- a/sklearn/mixture/_bayesian_mixture.py
+++ b/sklearn/mixture/_bayesian_mixture.py
@@ -4,19 +4,22 @@
 # License: BSD 3 clause
 
 import math
+from numbers import Real
+
 import numpy as np
 from scipy.special import betaln, digamma, gammaln
-from numbers import Real
 
-from ._base import BaseMixture, _check_shape
-from ._gaussian_mixture import _check_precision_matrix
-from ._gaussian_mixture import _check_precision_positivity
-from ._gaussian_mixture import _compute_log_det_cholesky
-from ._gaussian_mixture import _compute_precision_cholesky
-from ._gaussian_mixture import _estimate_gaussian_parameters
-from ._gaussian_mixture import _estimate_log_gaussian_prob
 from ..utils import check_array
 from ..utils._param_validation import Interval, StrOptions
+from ._base import BaseMixture, _check_shape
+from ._gaussian_mixture import (
+    _check_precision_matrix,
+    _check_precision_positivity,
+    _compute_log_det_cholesky,
+    _compute_precision_cholesky,
+    _estimate_gaussian_parameters,
+    _estimate_log_gaussian_prob,
+)
 
 
 def _log_dirichlet_norm(dirichlet_concentration):
diff --git a/sklearn/mixture/_gaussian_mixture.py b/sklearn/mixture/_gaussian_mixture.py
index e0b630f37c163..9fcc791032c48 100644
--- a/sklearn/mixture/_gaussian_mixture.py
+++ b/sklearn/mixture/_gaussian_mixture.py
@@ -5,14 +5,12 @@
 # License: BSD 3 clause
 
 import numpy as np
-
 from scipy import linalg
 
-from ._base import BaseMixture, _check_shape
 from ..utils import check_array
-from ..utils.extmath import row_norms
 from ..utils._param_validation import StrOptions
-
+from ..utils.extmath import row_norms
+from ._base import BaseMixture, _check_shape
 
 ###############################################################################
 # Gaussian mixture shape checkers used by the GaussianMixture class
@@ -350,6 +348,61 @@ def _compute_precision_cholesky(covariances, covariance_type):
     return precisions_chol
 
 
+def _flipudlr(array):
+    """Reverse the rows and columns of an array."""
+    return np.flipud(np.fliplr(array))
+
+
+def _compute_precision_cholesky_from_precisions(precisions, covariance_type):
+    r"""Compute the Cholesky decomposition of precisions using precisions themselves.
+
+    As implemented in :func:`_compute_precision_cholesky`, the `precisions_cholesky_` is
+    an upper-triangular matrix for each Gaussian component, which can be expressed as
+    the $UU^T$ factorization of the precision matrix for each Gaussian component, where
+    $U$ is an upper-triangular matrix.
+
+    In order to use the Cholesky decomposition to get $UU^T$, the precision matrix
+    $\Lambda$ needs to be permutated such that its rows and columns are reversed, which
+    can be done by applying a similarity transformation with an exchange matrix $J$,
+    where the 1 elements reside on the anti-diagonal and all other elements are 0. In
+    particular, the Cholesky decomposition of the transformed precision matrix is
+    $J\Lambda J=LL^T$, where $L$ is a lower-triangular matrix. Because $\Lambda=UU^T$
+    and $J=J^{-1}=J^T$, the `precisions_cholesky_` for each Gaussian component can be
+    expressed as $JLJ$.
+
+    Refer to #26415 for details.
+
+    Parameters
+    ----------
+    precisions : array-like
+        The precision matrix of the current components.
+        The shape depends on the covariance_type.
+
+    covariance_type : {'full', 'tied', 'diag', 'spherical'}
+        The type of precision matrices.
+
+    Returns
+    -------
+    precisions_cholesky : array-like
+        The cholesky decomposition of sample precisions of the current
+        components. The shape depends on the covariance_type.
+    """
+    if covariance_type == "full":
+        precisions_cholesky = np.array(
+            [
+                _flipudlr(linalg.cholesky(_flipudlr(precision), lower=True))
+                for precision in precisions
+            ]
+        )
+    elif covariance_type == "tied":
+        precisions_cholesky = _flipudlr(
+            linalg.cholesky(_flipudlr(precisions), lower=True)
+        )
+    else:
+        precisions_cholesky = np.sqrt(precisions)
+    return precisions_cholesky
+
+
 ###############################################################################
 # Gaussian mixture probability estimators
 def _compute_log_det_cholesky(matrix_chol, covariance_type, n_features):
@@ -725,19 +778,10 @@ def _initialize(self, X, resp):
             self.precisions_cholesky_ = _compute_precision_cholesky(
                 covariances, self.covariance_type
             )
-        elif self.covariance_type == "full":
-            self.precisions_cholesky_ = np.array(
-                [
-                    linalg.cholesky(prec_init, lower=True)
-                    for prec_init in self.precisions_init
-                ]
-            )
-        elif self.covariance_type == "tied":
-            self.precisions_cholesky_ = linalg.cholesky(
-                self.precisions_init, lower=True
-            )
         else:
-            self.precisions_cholesky_ = np.sqrt(self.precisions_init)
+            self.precisions_cholesky_ = _compute_precision_cholesky_from_precisions(
+                self.precisions_init, self.covariance_type
+            )
 
     def _m_step(self, X, log_resp):
         """M step.
diff --git a/sklearn/mixture/tests/test_bayesian_mixture.py b/sklearn/mixture/tests/test_bayesian_mixture.py
index 4e666a054bbd0..9c6eb4a86ea0d 100644
--- a/sklearn/mixture/tests/test_bayesian_mixture.py
+++ b/sklearn/mixture/tests/test_bayesian_mixture.py
@@ -4,23 +4,19 @@
 import copy
 
 import numpy as np
-from scipy.special import gammaln
 import pytest
+from scipy.special import gammaln
 
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-
+from sklearn.exceptions import ConvergenceWarning, NotFittedError
 from sklearn.metrics.cluster import adjusted_rand_score
-
-from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm
-from sklearn.mixture._bayesian_mixture import _log_wishart_norm
-
 from sklearn.mixture import BayesianGaussianMixture
-
+from sklearn.mixture._bayesian_mixture import _log_dirichlet_norm, _log_wishart_norm
 from sklearn.mixture.tests.test_gaussian_mixture import RandomData
-from sklearn.exceptions import ConvergenceWarning, NotFittedError
-from sklearn.utils._testing import ignore_warnings
-
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 
 COVARIANCE_TYPE = ["full", "tied", "diag", "spherical"]
 PRIOR_TYPE = ["dirichlet_process", "dirichlet_distribution"]
diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py
index f2d634b3fffe5..2f39033faed6b 100644
--- a/sklearn/mixture/tests/test_gaussian_mixture.py
+++ b/sklearn/mixture/tests/test_gaussian_mixture.py
@@ -2,39 +2,40 @@
 #         Thierry Guillemot <thierry.guillemot.work@gmail.com>
 # License: BSD 3 clause
 
+import copy
 import itertools
 import re
 import sys
-import copy
 import warnings
-import pytest
+from io import StringIO
 
 import numpy as np
-from scipy import stats, linalg
+import pytest
+from scipy import linalg, stats
 
 from sklearn.cluster import KMeans
 from sklearn.covariance import EmpiricalCovariance
 from sklearn.datasets import make_spd_matrix
-from io import StringIO
+from sklearn.exceptions import ConvergenceWarning, NotFittedError
 from sklearn.metrics.cluster import adjusted_rand_score
 from sklearn.mixture import GaussianMixture
 from sklearn.mixture._gaussian_mixture import (
-    _estimate_gaussian_covariances_full,
-    _estimate_gaussian_covariances_tied,
+    _compute_log_det_cholesky,
+    _compute_precision_cholesky,
     _estimate_gaussian_covariances_diag,
+    _estimate_gaussian_covariances_full,
     _estimate_gaussian_covariances_spherical,
+    _estimate_gaussian_covariances_tied,
     _estimate_gaussian_parameters,
-    _compute_precision_cholesky,
-    _compute_log_det_cholesky,
 )
-from sklearn.exceptions import ConvergenceWarning, NotFittedError
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
 from sklearn.utils.extmath import fast_logdet
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import ignore_warnings
-
 
 COVARIANCE_TYPE = ["full", "tied", "diag", "spherical"]
 
@@ -1325,6 +1326,58 @@ def test_gaussian_mixture_precisions_init_diag():
     )
 
 
+def _generate_data(seed, n_samples, n_features, n_components):
+    """Randomly generate samples and responsibilities."""
+    rs = np.random.RandomState(seed)
+    X = rs.random_sample((n_samples, n_features))
+    resp = rs.random_sample((n_samples, n_components))
+    resp /= resp.sum(axis=1)[:, np.newaxis]
+    return X, resp
+
+
+def _calculate_precisions(X, resp, covariance_type):
+    """Calculate precision matrix of X and its Cholesky decomposition
+    for the given covariance type.
+    """
+    reg_covar = 1e-6
+    weights, means, covariances = _estimate_gaussian_parameters(
+        X, resp, reg_covar, covariance_type
+    )
+    precisions_cholesky = _compute_precision_cholesky(covariances, covariance_type)
+
+    _, n_components = resp.shape
+    # Instantiate a `GaussianMixture` model in order to use its
+    # `_set_parameters` method to return the `precisions_` and
+    #  `precisions_cholesky_` from matching the `covariance_type`
+    # provided.
+    gmm = GaussianMixture(n_components=n_components, covariance_type=covariance_type)
+    params = (weights, means, covariances, precisions_cholesky)
+    gmm._set_parameters(params)
+    return gmm.precisions_, gmm.precisions_cholesky_
+
+
+@pytest.mark.parametrize("covariance_type", COVARIANCE_TYPE)
+def test_gaussian_mixture_precisions_init(covariance_type, global_random_seed):
+    """Non-regression test for #26415."""
+
+    X, resp = _generate_data(
+        seed=global_random_seed,
+        n_samples=100,
+        n_features=3,
+        n_components=4,
+    )
+
+    precisions_init, desired_precisions_cholesky = _calculate_precisions(
+        X, resp, covariance_type
+    )
+    gmm = GaussianMixture(
+        covariance_type=covariance_type, precisions_init=precisions_init
+    )
+    gmm._initialize(X, resp)
+    actual_precisions_cholesky = gmm.precisions_cholesky_
+    assert_allclose(actual_precisions_cholesky, desired_precisions_cholesky)
+
+
 def test_gaussian_mixture_single_component_stable():
     """
     Non-regression test for #23032 ensuring 1-component GM works on only a
diff --git a/sklearn/mixture/tests/test_mixture.py b/sklearn/mixture/tests/test_mixture.py
index eeb71d0f89407..f0ea3494f0e7d 100644
--- a/sklearn/mixture/tests/test_mixture.py
+++ b/sklearn/mixture/tests/test_mixture.py
@@ -1,11 +1,10 @@
 # Author: Guillaume Lemaitre <g.lemaitre58@gmail.com>
 # License: BSD 3 clause
 
-import pytest
 import numpy as np
+import pytest
 
-from sklearn.mixture import GaussianMixture
-from sklearn.mixture import BayesianGaussianMixture
+from sklearn.mixture import BayesianGaussianMixture, GaussianMixture
 
 
 @pytest.mark.parametrize("estimator", [GaussianMixture(), BayesianGaussianMixture()])
diff --git a/sklearn/model_selection/__init__.py b/sklearn/model_selection/__init__.py
index 4a3f5d1e239a8..d7d316d95ada4 100644
--- a/sklearn/model_selection/__init__.py
+++ b/sklearn/model_selection/__init__.py
@@ -1,39 +1,36 @@
 import typing
 
-from ._split import BaseCrossValidator
-from ._split import BaseShuffleSplit
-from ._split import KFold
-from ._split import GroupKFold
-from ._split import StratifiedKFold
-from ._split import TimeSeriesSplit
-from ._split import LeaveOneGroupOut
-from ._split import LeaveOneOut
-from ._split import LeavePGroupsOut
-from ._split import LeavePOut
-from ._split import RepeatedKFold
-from ._split import RepeatedStratifiedKFold
-from ._split import ShuffleSplit
-from ._split import GroupShuffleSplit
-from ._split import StratifiedShuffleSplit
-from ._split import StratifiedGroupKFold
-from ._split import PredefinedSplit
-from ._split import train_test_split
-from ._split import check_cv
-
-from ._validation import cross_val_score
-from ._validation import cross_val_predict
-from ._validation import cross_validate
-from ._validation import learning_curve
-from ._validation import permutation_test_score
-from ._validation import validation_curve
-
-from ._search import GridSearchCV
-from ._search import RandomizedSearchCV
-from ._search import ParameterGrid
-from ._search import ParameterSampler
-
-from ._plot import LearningCurveDisplay
-from ._plot import ValidationCurveDisplay
+from ._plot import LearningCurveDisplay, ValidationCurveDisplay
+from ._search import GridSearchCV, ParameterGrid, ParameterSampler, RandomizedSearchCV
+from ._split import (
+    BaseCrossValidator,
+    BaseShuffleSplit,
+    GroupKFold,
+    GroupShuffleSplit,
+    KFold,
+    LeaveOneGroupOut,
+    LeaveOneOut,
+    LeavePGroupsOut,
+    LeavePOut,
+    PredefinedSplit,
+    RepeatedKFold,
+    RepeatedStratifiedKFold,
+    ShuffleSplit,
+    StratifiedGroupKFold,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+    TimeSeriesSplit,
+    check_cv,
+    train_test_split,
+)
+from ._validation import (
+    cross_val_predict,
+    cross_val_score,
+    cross_validate,
+    learning_curve,
+    permutation_test_score,
+    validation_curve,
+)
 
 if typing.TYPE_CHECKING:
     # Avoid errors in type checkers (e.g. mypy) for experimental estimators.
diff --git a/sklearn/model_selection/_plot.py b/sklearn/model_selection/_plot.py
index bc5a600e57234..b36f16d415c7a 100644
--- a/sklearn/model_selection/_plot.py
+++ b/sklearn/model_selection/_plot.py
@@ -2,9 +2,9 @@
 
 import numpy as np
 
-from . import learning_curve, validation_curve
 from ..utils import check_matplotlib_support
-from ..utils._plotting import _validate_score_name, _interval_max_min_ratio
+from ..utils._plotting import _interval_max_min_ratio, _validate_score_name
+from ._validation import learning_curve, validation_curve
 
 
 class _BaseCurveDisplay:
@@ -380,7 +380,7 @@ def from_estimator(
             For int/None inputs, if the estimator is a classifier and `y` is
             either binary or multiclass,
             :class:`~sklearn.model_selection.StratifiedKFold` is used. In all
-            other cases, :class:`~sklearn.model_selectionKFold` is used. These
+            other cases, :class:`~sklearn.model_selection.KFold` is used. These
             splitters are instantiated with `shuffle=False` so the splits will
             be the same across calls.
 
@@ -552,7 +552,7 @@ class ValidationCurveDisplay(_BaseCurveDisplay):
     param_name : str
         Name of the parameter that has been varied.
 
-    param_range : ndarray of shape (n_ticks,)
+    param_range : array-like of shape (n_ticks,)
         The values of the parameter that have been evaluated.
 
     train_scores : ndarray of shape (n_ticks, n_cv_folds)
@@ -772,7 +772,7 @@ def from_estimator(
             For int/None inputs, if the estimator is a classifier and `y` is
             either binary or multiclass,
             :class:`~sklearn.model_selection.StratifiedKFold` is used. In all
-            other cases, :class:`~sklearn.model_selectionKFold` is used. These
+            other cases, :class:`~sklearn.model_selection.KFold` is used. These
             splitters are instantiated with `shuffle=False` so the splits will
             be the same across calls.
 
@@ -891,7 +891,7 @@ def from_estimator(
 
         viz = cls(
             param_name=param_name,
-            param_range=param_range,
+            param_range=np.array(param_range, copy=False),
             train_scores=train_scores,
             test_scores=test_scores,
             score_name=score_name,
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 695614f4e1fa0..c2e94712ea97a 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -10,39 +10,39 @@
 #         Raghav RV <rvraghav93@gmail.com>
 # License: BSD 3 clause
 
-from abc import ABCMeta, abstractmethod
-from collections import defaultdict
-from collections.abc import Mapping, Sequence, Iterable
-from functools import partial, reduce
-from itertools import product
 import numbers
 import operator
 import time
 import warnings
+from abc import ABCMeta, abstractmethod
+from collections import defaultdict
+from collections.abc import Iterable, Mapping, Sequence
+from functools import partial, reduce
+from itertools import product
 
 import numpy as np
 from numpy.ma import MaskedArray
 from scipy.stats import rankdata
 
-from ..base import BaseEstimator, is_classifier, clone
-from ..base import MetaEstimatorMixin
-from ..base import _fit_context
-from ._split import check_cv
-from ._validation import _fit_and_score
-from ._validation import _aggregate_score_dicts
-from ._validation import _insert_error_scores
-from ._validation import _normalize_score_results
-from ._validation import _warn_or_raise_about_fit_failures
+from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone, is_classifier
 from ..exceptions import NotFittedError
+from ..metrics import check_scoring
+from ..metrics._scorer import _check_multimetric_scoring, get_scorer_names
 from ..utils import check_random_state
-from ..utils.random import sample_without_replacement
 from ..utils._param_validation import HasMethods, Interval, StrOptions
 from ..utils._tags import _safe_tags
-from ..utils.validation import indexable, check_is_fitted, _check_fit_params
 from ..utils.metaestimators import available_if
-from ..utils.parallel import delayed, Parallel
-from ..metrics._scorer import _check_multimetric_scoring, get_scorer_names
-from ..metrics import check_scoring
+from ..utils.parallel import Parallel, delayed
+from ..utils.random import sample_without_replacement
+from ..utils.validation import _check_fit_params, check_is_fitted, indexable
+from ._split import check_cv
+from ._validation import (
+    _aggregate_score_dicts,
+    _fit_and_score,
+    _insert_error_scores,
+    _normalize_score_results,
+    _warn_or_raise_about_fit_failures,
+)
 
 __all__ = ["GridSearchCV", "ParameterGrid", "ParameterSampler", "RandomizedSearchCV"]
 
@@ -923,11 +923,14 @@ def evaluate_candidates(candidate_params, cv=None, more_results=None):
             self.best_params_ = results["params"][self.best_index_]
 
         if self.refit:
-            # we clone again after setting params in case some
-            # of the params are estimators as well.
-            self.best_estimator_ = clone(
-                clone(base_estimator).set_params(**self.best_params_)
+            # here we clone the estimator as well as the parameters, since
+            # sometimes the parameters themselves might be estimators, e.g.
+            # when we search over different estimators in a pipeline.
+            # ref: https://github.com/scikit-learn/scikit-learn/pull/26786
+            self.best_estimator_ = clone(base_estimator).set_params(
+                **clone(self.best_params_, safe=False)
             )
+
             refit_start_time = time.time()
             if y is not None:
                 self.best_estimator_.fit(X, y, **fit_params)
diff --git a/sklearn/model_selection/_search_successive_halving.py b/sklearn/model_selection/_search_successive_halving.py
index a061d7283b46d..708092d09a2a5 100644
--- a/sklearn/model_selection/_search_successive_halving.py
+++ b/sklearn/model_selection/_search_successive_halving.py
@@ -1,20 +1,19 @@
+from abc import abstractmethod
 from copy import deepcopy
 from math import ceil, floor, log
-from abc import abstractmethod
 from numbers import Integral, Real
 
 import numpy as np
-from ._search import BaseSearchCV
-from . import ParameterGrid, ParameterSampler
-from ..base import is_classifier
-from ..base import _fit_context
-from ._split import check_cv, _yields_constant_splits
+
+from ..base import _fit_context, is_classifier
 from ..metrics._scorer import get_scorer_names
 from ..utils import resample
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.multiclass import check_classification_targets
 from ..utils.validation import _num_samples
-
+from . import ParameterGrid, ParameterSampler
+from ._search import BaseSearchCV
+from ._split import _yields_constant_splits, check_cv
 
 __all__ = ["HalvingGridSearchCV", "HalvingRandomSearchCV"]
 
@@ -751,11 +750,13 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
         Either estimator needs to provide a ``score`` function,
         or ``scoring`` must be passed.
 
-    param_distributions : dict
-        Dictionary with parameters names (string) as keys and distributions
+    param_distributions : dict or list of dicts
+        Dictionary with parameters names (`str`) as keys and distributions
         or lists of parameters to try. Distributions must provide a ``rvs``
         method for sampling (such as those from scipy.stats.distributions).
         If a list is given, it is sampled uniformly.
+        If a list of dicts is given, first a dict is sampled uniformly, and
+        then a parameter is sampled using that dict as above.
 
     n_candidates : "exhaust" or int, default="exhaust"
         The number of candidate parameters to sample, at the first
@@ -1025,7 +1026,7 @@ class HalvingRandomSearchCV(BaseSuccessiveHalving):
 
     _parameter_constraints: dict = {
         **BaseSuccessiveHalving._parameter_constraints,
-        "param_distributions": [dict],
+        "param_distributions": [dict, list],
         "n_candidates": [
             Interval(Integral, 0, None, closed="neither"),
             StrOptions({"exhaust"}),
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index bf9f5a0caf0bf..959a8c361d879 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -11,27 +11,29 @@
 #         Rodion Martynov <marrodion@gmail.com>
 # License: BSD 3 clause
 
-from collections.abc import Iterable
-from collections import defaultdict
-import warnings
-from itertools import chain, combinations
-from math import ceil, floor
 import numbers
+import warnings
 from abc import ABCMeta, abstractmethod
+from collections import defaultdict
+from collections.abc import Iterable
 from inspect import signature
+from itertools import chain, combinations
+from math import ceil, floor
 
 import numpy as np
 from scipy.special import comb
 
-from ..utils import indexable, check_random_state, _safe_indexing
-from ..utils import _approximate_mode
-from ..utils.validation import _num_samples, column_or_1d
-from ..utils.validation import check_array
-from ..utils.multiclass import type_of_target
-from ..utils import metadata_routing
+from ..utils import (
+    _approximate_mode,
+    _safe_indexing,
+    check_random_state,
+    indexable,
+    metadata_routing,
+)
+from ..utils._param_validation import Interval, RealNotInt, validate_params
 from ..utils.metadata_routing import _MetadataRequester
-from ..utils._param_validation import validate_params, Interval
-from ..utils._param_validation import RealNotInt
+from ..utils.multiclass import type_of_target
+from ..utils.validation import _num_samples, check_array, column_or_1d
 
 __all__ = [
     "BaseCrossValidator",
@@ -408,6 +410,10 @@ class KFold(_BaseKFold):
 
     Read more in the :ref:`User Guide <k_fold>`.
 
+    For visualisation of cross-validation behaviour and
+    comparison between common scikit-learn split methods
+    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`
+
     Parameters
     ----------
     n_splits : int, default=5
@@ -500,6 +506,10 @@ class GroupKFold(GroupsConsumerMixin, _BaseKFold):
 
     Read more in the :ref:`User Guide <group_k_fold>`.
 
+    For visualisation of cross-validation behaviour and
+    comparison between common scikit-learn split methods
+    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`
+
     Parameters
     ----------
     n_splits : int, default=5
@@ -624,6 +634,10 @@ class StratifiedKFold(_BaseKFold):
 
     Read more in the :ref:`User Guide <stratified_k_fold>`.
 
+    For visualisation of cross-validation behaviour and
+    comparison between common scikit-learn split methods
+    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`
+
     Parameters
     ----------
     n_splits : int, default=5
@@ -815,6 +829,10 @@ class StratifiedGroupKFold(GroupsConsumerMixin, _BaseKFold):
 
     Read more in the :ref:`User Guide <cross_validation>`.
 
+    For visualisation of cross-validation behaviour and
+    comparison between common scikit-learn split methods
+    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`
+
     Parameters
     ----------
     n_splits : int, default=5
@@ -1014,6 +1032,10 @@ class TimeSeriesSplit(_BaseKFold):
 
     Read more in the :ref:`User Guide <time_series_split>`.
 
+    For visualisation of cross-validation behaviour and
+    comparison between common scikit-learn split methods
+    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`
+
     .. versionadded:: 0.18
 
     Parameters
@@ -1764,6 +1786,10 @@ class ShuffleSplit(BaseShuffleSplit):
 
     Read more in the :ref:`User Guide <ShuffleSplit>`.
 
+    For visualisation of cross-validation behaviour and
+    comparison between common scikit-learn split methods
+    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`
+
     Parameters
     ----------
     n_splits : int, default=10
@@ -1894,6 +1920,10 @@ class GroupShuffleSplit(GroupsConsumerMixin, ShuffleSplit):
 
     Read more in the :ref:`User Guide <group_shuffle_split>`.
 
+    For visualisation of cross-validation behaviour and
+    comparison between common scikit-learn split methods
+    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`
+
     Parameters
     ----------
     n_splits : int, default=5
@@ -1971,8 +2001,8 @@ def _iter_indices(self, X, y, groups):
             # these are the indices of classes in the partition
             # invert them into data indices
 
-            train = np.flatnonzero(np.in1d(group_indices, group_train))
-            test = np.flatnonzero(np.in1d(group_indices, group_test))
+            train = np.flatnonzero(np.isin(group_indices, group_train))
+            test = np.flatnonzero(np.isin(group_indices, group_test))
 
             yield train, test
 
@@ -2024,6 +2054,10 @@ class StratifiedShuffleSplit(BaseShuffleSplit):
 
     Read more in the :ref:`User Guide <stratified_shuffle_split>`.
 
+    For visualisation of cross-validation behaviour and
+    comparison between common scikit-learn split methods
+    refer to :ref:`sphx_glr_auto_examples_model_selection_plot_cv_indices.py`
+
     Parameters
     ----------
     n_splits : int, default=10
@@ -2510,7 +2544,8 @@ def check_cv(cv=5, y=None, *, classifier=False):
         "random_state": ["random_state"],
         "shuffle": ["boolean"],
         "stratify": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def train_test_split(
     *arrays,
@@ -2670,7 +2705,7 @@ def _pprint(params, offset=0, printer=repr):
     this_line_length = offset
     line_sep = ",\n" + (1 + offset // 2) * " "
     for i, (k, v) in enumerate(sorted(params.items())):
-        if type(v) is float:
+        if isinstance(v, float):
             # use str for representing floating point numbers
             # this way we get consistent representation across
             # architectures and versions.
diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py
index a103139c1640d..033eb6ec26bd9 100644
--- a/sklearn/model_selection/_validation.py
+++ b/sklearn/model_selection/_validation.py
@@ -11,39 +11,36 @@
 # License: BSD 3 clause
 
 
-import warnings
 import numbers
 import time
+import warnings
+from collections import Counter
+from contextlib import suppress
 from functools import partial
 from numbers import Real
 from traceback import format_exc
-from contextlib import suppress
-from collections import Counter
 
 import numpy as np
 import scipy.sparse as sp
 from joblib import logger
 
-from ..base import is_classifier, clone
-from ..utils import indexable, check_random_state, _safe_indexing
-from ..utils.validation import _check_fit_params
-from ..utils.validation import _num_samples
-from ..utils.parallel import delayed, Parallel
-from ..utils.metaestimators import _safe_split
+from ..base import clone, is_classifier
+from ..exceptions import FitFailedWarning
+from ..metrics import check_scoring, get_scorer_names
+from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer
+from ..preprocessing import LabelEncoder
+from ..utils import _safe_indexing, check_random_state, indexable
 from ..utils._param_validation import (
     HasMethods,
-    Interval,
     Integral,
+    Interval,
     StrOptions,
     validate_params,
 )
-from ..metrics import check_scoring
-from ..metrics import get_scorer_names
-from ..metrics._scorer import _check_multimetric_scoring, _MultimetricScorer
-from ..exceptions import FitFailedWarning
+from ..utils.metaestimators import _safe_split
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import _check_fit_params, _num_samples
 from ._split import check_cv
-from ..preprocessing import LabelEncoder
-
 
 __all__ = [
     "cross_validate",
@@ -78,7 +75,8 @@
         "return_estimator": ["boolean"],
         "return_indices": ["boolean"],
         "error_score": [StrOptions({"raise"}), Real],
-    }
+    },
+    prefer_skip_nested_validation=False,  # estimator is not validated yet
 )
 def cross_validate(
     estimator,
@@ -712,14 +710,11 @@ def _fit_and_score(
     fit_params = _check_fit_params(X, fit_params, train)
 
     if parameters is not None:
-        # clone after setting parameters in case any parameters
-        # are estimators (like pipeline steps)
-        # because pipeline doesn't clone steps in fit
-        cloned_parameters = {}
-        for k, v in parameters.items():
-            cloned_parameters[k] = clone(v, safe=False)
-
-        estimator = estimator.set_params(**cloned_parameters)
+        # here we clone the parameters, since sometimes the parameters
+        # themselves might be estimators, e.g. when we search over different
+        # estimators in a pipeline.
+        # ref: https://github.com/scikit-learn/scikit-learn/pull/26786
+        estimator = estimator.set_params(**clone(parameters, safe=False))
 
     start_time = time.time()
 
@@ -1249,7 +1244,8 @@ def _check_is_permutation(indices, n_samples):
         "verbose": ["verbose"],
         "scoring": [StrOptions(set(get_scorer_names())), callable, None],
         "fit_params": [dict, None],
-    }
+    },
+    prefer_skip_nested_validation=False,  # estimator is not validated yet
 )
 def permutation_test_score(
     estimator,
@@ -1450,7 +1446,8 @@ def _shuffle(y, groups, random_state):
         "error_score": [StrOptions({"raise"}), Real],
         "return_times": ["boolean"],
         "fit_params": [dict, None],
-    }
+    },
+    prefer_skip_nested_validation=False,  # estimator is not validated yet
 )
 def learning_curve(
     estimator,
@@ -1855,7 +1852,8 @@ def _incremental_fit_estimator(
         "verbose": ["verbose"],
         "error_score": [StrOptions({"raise"}), Real],
         "fit_params": [dict, None],
-    }
+    },
+    prefer_skip_nested_validation=False,  # estimator is not validated yet
 )
 def validation_curve(
     estimator,
diff --git a/sklearn/model_selection/tests/test_plot.py b/sklearn/model_selection/tests/test_plot.py
index e1e5003bc8a6b..a3dad60f7bf40 100644
--- a/sklearn/model_selection/tests/test_plot.py
+++ b/sklearn/model_selection/tests/test_plot.py
@@ -2,13 +2,16 @@
 import pytest
 
 from sklearn.datasets import load_iris
+from sklearn.model_selection import (
+    LearningCurveDisplay,
+    ValidationCurveDisplay,
+    learning_curve,
+    validation_curve,
+)
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.utils import shuffle
 from sklearn.utils._testing import assert_allclose, assert_array_equal
 
-from sklearn.model_selection import learning_curve, validation_curve
-from sklearn.model_selection import LearningCurveDisplay, ValidationCurveDisplay
-
 
 @pytest.fixture
 def data():
@@ -113,7 +116,6 @@ def test_validation_curve_display_default_usage(pyplot, data):
         estimator, X, y, param_name=param_name, param_range=param_range
     )
 
-    assert display.param_range == param_range
     assert_array_equal(display.param_range, param_range)
     assert_allclose(display.train_scores, train_scores)
     assert_allclose(display.test_scores, test_scores)
@@ -525,7 +527,7 @@ def test_curve_display_plot_kwargs(pyplot, data, CurveDisplay, specific_params):
 
 
 # TODO(1.5): to be removed
-def test_learning_curve_display_deprecate_log_scale(data):
+def test_learning_curve_display_deprecate_log_scale(data, pyplot):
     """Check that we warn for the deprecated parameter `log_scale`."""
     X, y = data
     estimator = DecisionTreeClassifier(random_state=0)
@@ -545,3 +547,26 @@ def test_learning_curve_display_deprecate_log_scale(data):
 
     assert display.ax_.get_xscale() == "linear"
     assert display.ax_.get_yscale() == "linear"
+
+
+@pytest.mark.parametrize(
+    "param_range, xscale",
+    [([5, 10, 15], "linear"), ([-50, 5, 50, 500], "symlog"), ([5, 50, 500], "log")],
+)
+def test_validation_curve_xscale_from_param_range_provided_as_a_list(
+    pyplot, data, param_range, xscale
+):
+    """Check the induced xscale from the provided param_range values."""
+    X, y = data
+    estimator = DecisionTreeClassifier(random_state=0)
+
+    param_name = "max_depth"
+    display = ValidationCurveDisplay.from_estimator(
+        estimator,
+        X,
+        y,
+        param_name=param_name,
+        param_range=param_range,
+    )
+
+    assert display.ax_.get_xscale() == xscale
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index a021e6c8c392a..50b519118a2b3 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1,75 +1,73 @@
 """Test the search module"""
 
+import pickle
+import re
+import sys
 from collections.abc import Iterable, Sized
+from functools import partial
 from io import StringIO
 from itertools import chain, product
-from functools import partial
-import pickle
-import sys
 from types import GeneratorType
-import re
 
 import numpy as np
-import scipy.sparse as sp
 import pytest
+import scipy.sparse as sp
+from scipy.stats import bernoulli, expon, uniform
 
+from sklearn.base import BaseEstimator, ClassifierMixin, is_classifier
+from sklearn.cluster import KMeans
+from sklearn.datasets import (
+    make_blobs,
+    make_classification,
+    make_multilabel_classification,
+)
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.experimental import enable_halving_search_cv  # noqa
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import LinearRegression, Ridge, SGDClassifier
+from sklearn.metrics import (
+    accuracy_score,
+    confusion_matrix,
+    f1_score,
+    make_scorer,
+    r2_score,
+    recall_score,
+    roc_auc_score,
+)
+from sklearn.metrics.pairwise import euclidean_distances
+from sklearn.model_selection import (
+    GridSearchCV,
+    GroupKFold,
+    GroupShuffleSplit,
+    HalvingGridSearchCV,
+    KFold,
+    LeaveOneGroupOut,
+    LeavePGroupsOut,
+    ParameterGrid,
+    ParameterSampler,
+    RandomizedSearchCV,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+    train_test_split,
+)
+from sklearn.model_selection._search import BaseSearchCV
+from sklearn.model_selection._validation import FitFailedWarning
+from sklearn.model_selection.tests.common import OneTimeSplitter
+from sklearn.neighbors import KernelDensity, KNeighborsClassifier, LocalOutlierFactor
+from sklearn.pipeline import Pipeline
+from sklearn.svm import SVC, LinearSVC
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils._mocking import CheckingClassifier, MockDataFrame
 from sklearn.utils._testing import (
-    assert_array_equal,
-    assert_array_almost_equal,
-    assert_allclose,
-    assert_almost_equal,
-    ignore_warnings,
     MinimalClassifier,
     MinimalRegressor,
     MinimalTransformer,
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
 )
-from sklearn.utils._mocking import CheckingClassifier, MockDataFrame
-
-from scipy.stats import bernoulli, expon, uniform
-
-from sklearn.base import BaseEstimator, ClassifierMixin
-from sklearn.base import is_classifier
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_blobs
-from sklearn.datasets import make_multilabel_classification
-
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import KFold
-from sklearn.model_selection import StratifiedKFold
-from sklearn.model_selection import StratifiedShuffleSplit
-from sklearn.model_selection import LeaveOneGroupOut
-from sklearn.model_selection import LeavePGroupsOut
-from sklearn.model_selection import GroupKFold
-from sklearn.model_selection import GroupShuffleSplit
-from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import RandomizedSearchCV
-from sklearn.model_selection import ParameterGrid
-from sklearn.model_selection import ParameterSampler
-from sklearn.model_selection._search import BaseSearchCV
-
-from sklearn.model_selection._validation import FitFailedWarning
-
-from sklearn.svm import LinearSVC, SVC
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.cluster import KMeans
-from sklearn.neighbors import KernelDensity
-from sklearn.neighbors import LocalOutlierFactor
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.metrics import f1_score
-from sklearn.metrics import recall_score
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import make_scorer
-from sklearn.metrics import roc_auc_score
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import r2_score
-from sklearn.metrics.pairwise import euclidean_distances
-from sklearn.impute import SimpleImputer
-from sklearn.pipeline import Pipeline
-from sklearn.linear_model import Ridge, SGDClassifier, LinearRegression
-from sklearn.ensemble import HistGradientBoostingClassifier
-
-from sklearn.model_selection.tests.common import OneTimeSplitter
 
 
 # Neither of the following two estimators inherit from BaseEstimator,
@@ -786,7 +784,7 @@ def test_pandas_input():
     # check cross_val_score doesn't destroy pandas dataframe
     types = [(MockDataFrame, MockDataFrame)]
     try:
-        from pandas import Series, DataFrame
+        from pandas import DataFrame, Series
 
         types.append((DataFrame, Series))
     except ImportError:
@@ -902,18 +900,16 @@ def check_cv_results_array_types(search, param_keys, score_keys):
         assert cv_results["rank_test_%s" % key].dtype == np.int32
 
 
-def check_cv_results_keys(cv_results, param_keys, score_keys, n_cand):
+def check_cv_results_keys(cv_results, param_keys, score_keys, n_cand, extra_keys=()):
     # Test the search.cv_results_ contains all the required results
-    assert_array_equal(
-        sorted(cv_results.keys()), sorted(param_keys + score_keys + ("params",))
-    )
+    all_keys = param_keys + score_keys + extra_keys
+    assert_array_equal(sorted(cv_results.keys()), sorted(all_keys + ("params",)))
     assert all(cv_results[key].shape == (n_cand,) for key in param_keys + score_keys)
 
 
 def test_grid_search_cv_results():
     X, y = make_classification(n_samples=50, n_features=4, random_state=42)
 
-    n_splits = 3
     n_grid_points = 6
     params = [
         dict(
@@ -951,9 +947,7 @@ def test_grid_search_cv_results():
     )
     n_candidates = n_grid_points
 
-    search = GridSearchCV(
-        SVC(), cv=n_splits, param_grid=params, return_train_score=True
-    )
+    search = GridSearchCV(SVC(), cv=3, param_grid=params, return_train_score=True)
     search.fit(X, y)
     cv_results = search.cv_results_
     # Check if score and timing are reasonable
@@ -969,17 +963,20 @@ def test_grid_search_cv_results():
     check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
     # Check masking
     cv_results = search.cv_results_
-    n_candidates = len(search.cv_results_["params"])
-    assert all(
+
+    poly_results = [
         (
             cv_results["param_C"].mask[i]
             and cv_results["param_gamma"].mask[i]
             and not cv_results["param_degree"].mask[i]
         )
         for i in range(n_candidates)
-        if cv_results["param_kernel"][i] == "linear"
-    )
-    assert all(
+        if cv_results["param_kernel"][i] == "poly"
+    ]
+    assert all(poly_results)
+    assert len(poly_results) == 2
+
+    rbf_results = [
         (
             not cv_results["param_C"].mask[i]
             and not cv_results["param_gamma"].mask[i]
@@ -987,13 +984,14 @@ def test_grid_search_cv_results():
         )
         for i in range(n_candidates)
         if cv_results["param_kernel"][i] == "rbf"
-    )
+    ]
+    assert all(rbf_results)
+    assert len(rbf_results) == 4
 
 
 def test_random_search_cv_results():
     X, y = make_classification(n_samples=50, n_features=4, random_state=42)
 
-    n_splits = 3
     n_search_iter = 30
 
     params = [
@@ -1018,12 +1016,12 @@ def test_random_search_cv_results():
         "mean_score_time",
         "std_score_time",
     )
-    n_cand = n_search_iter
+    n_candidates = n_search_iter
 
     search = RandomizedSearchCV(
         SVC(),
         n_iter=n_search_iter,
-        cv=n_splits,
+        cv=3,
         param_distributions=params,
         return_train_score=True,
     )
@@ -1031,8 +1029,7 @@ def test_random_search_cv_results():
     cv_results = search.cv_results_
     # Check results structure
     check_cv_results_array_types(search, param_keys, score_keys)
-    check_cv_results_keys(cv_results, param_keys, score_keys, n_cand)
-    n_candidates = len(search.cv_results_["params"])
+    check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates)
     assert all(
         (
             cv_results["param_C"].mask[i]
@@ -1040,7 +1037,7 @@ def test_random_search_cv_results():
             and not cv_results["param_degree"].mask[i]
         )
         for i in range(n_candidates)
-        if cv_results["param_kernel"][i] == "linear"
+        if cv_results["param_kernel"][i] == "poly"
     )
     assert all(
         (
@@ -1421,7 +1418,7 @@ def test_grid_search_correct_score_results():
         expected_keys = ("mean_test_score", "rank_test_score") + tuple(
             "split%d_test_score" % cv_i for cv_i in range(n_splits)
         )
-        assert all(np.in1d(expected_keys, result_keys))
+        assert all(np.isin(expected_keys, result_keys))
 
         cv = StratifiedKFold(n_splits=n_splits)
         n_splits = grid_search.n_splits_
@@ -2424,3 +2421,31 @@ def test_search_cv_verbose_3(capsys, return_train_score):
     else:
         match = re.findall(r"score=[\d\.]+", captured)
     assert len(match) == 3
+
+
+@pytest.mark.parametrize(
+    "SearchCV, param_search",
+    [
+        (GridSearchCV, "param_grid"),
+        (RandomizedSearchCV, "param_distributions"),
+        (HalvingGridSearchCV, "param_grid"),
+    ],
+)
+def test_search_estimator_param(SearchCV, param_search):
+    # test that SearchCV object doesn't change the object given in the parameter grid
+    X, y = make_classification(random_state=42)
+
+    params = {"clf": [LinearSVC(dual="auto")], "clf__C": [0.01]}
+    orig_C = params["clf"][0].C
+
+    pipe = Pipeline([("trs", MinimalTransformer()), ("clf", None)])
+
+    param_grid_search = {param_search: params}
+    gs = SearchCV(pipe, refit=True, cv=2, scoring="accuracy", **param_grid_search).fit(
+        X, y
+    )
+
+    # testing that the original object in params is not changed
+    assert params["clf"][0].C == orig_C
+    # testing that the GS is setting the parameter of the step correctly
+    assert gs.best_estimator_.named_steps["clf"].C == 0.01
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 610f4c8e4bcdf..600ae920073b3 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -1,59 +1,57 @@
 """Test the split module"""
-import warnings
-import pytest
 import re
+import warnings
+from itertools import combinations, combinations_with_replacement, permutations
+
 import numpy as np
+import pytest
+from scipy import stats
 from scipy.sparse import (
     coo_matrix,
     csc_matrix,
     csr_matrix,
-    isspmatrix_csr,
+    issparse,
 )
-from scipy import stats
 from scipy.special import comb
-from itertools import combinations
-from itertools import combinations_with_replacement
-from itertools import permutations
-
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils.validation import _num_samples
-from sklearn.utils._mocking import MockDataFrame
-
-from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import KFold
-from sklearn.model_selection import StratifiedKFold
-from sklearn.model_selection import GroupKFold
-from sklearn.model_selection import TimeSeriesSplit
-from sklearn.model_selection import LeaveOneOut
-from sklearn.model_selection import LeaveOneGroupOut
-from sklearn.model_selection import LeavePOut
-from sklearn.model_selection import LeavePGroupsOut
-from sklearn.model_selection import ShuffleSplit
-from sklearn.model_selection import GroupShuffleSplit
-from sklearn.model_selection import StratifiedShuffleSplit
-from sklearn.model_selection import PredefinedSplit
-from sklearn.model_selection import check_cv
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import RepeatedKFold
-from sklearn.model_selection import RepeatedStratifiedKFold
-from sklearn.model_selection import StratifiedGroupKFold
 
+from sklearn.datasets import load_digits, make_classification
 from sklearn.dummy import DummyClassifier
-
-from sklearn.model_selection._split import _validate_shuffle_split
-from sklearn.model_selection._split import _build_repr
-from sklearn.model_selection._split import _yields_constant_splits
-
-from sklearn.datasets import load_digits
-from sklearn.datasets import make_classification
-
+from sklearn.model_selection import (
+    GridSearchCV,
+    GroupKFold,
+    GroupShuffleSplit,
+    KFold,
+    LeaveOneGroupOut,
+    LeaveOneOut,
+    LeavePGroupsOut,
+    LeavePOut,
+    PredefinedSplit,
+    RepeatedKFold,
+    RepeatedStratifiedKFold,
+    ShuffleSplit,
+    StratifiedGroupKFold,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+    TimeSeriesSplit,
+    check_cv,
+    cross_val_score,
+    train_test_split,
+)
+from sklearn.model_selection._split import (
+    _build_repr,
+    _validate_shuffle_split,
+    _yields_constant_splits,
+)
 from sklearn.svm import SVC
-
 from sklearn.tests.test_metadata_routing import assert_request_is_empty
+from sklearn.utils._mocking import MockDataFrame
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.validation import _num_samples
 
 NO_GROUP_SPLITTERS = [
     KFold(),
@@ -820,7 +818,7 @@ def test_stratified_shuffle_split_iter():
             assert len(train) + len(test) == y.size
             assert len(train) == train_size
             assert len(test) == test_size
-            assert_array_equal(np.lib.arraysetops.intersect1d(train, test), [])
+            assert_array_equal(np.intersect1d(train, test), [])
 
 
 def test_stratified_shuffle_split_even():
@@ -977,8 +975,8 @@ def test_group_shuffle_split():
             # First test: no train group is in the test set and vice versa
             l_train_unique = np.unique(l[train])
             l_test_unique = np.unique(l[test])
-            assert not np.any(np.in1d(l[train], l_test_unique))
-            assert not np.any(np.in1d(l[test], l_train_unique))
+            assert not np.any(np.isin(l[train], l_test_unique))
+            assert not np.any(np.isin(l[test], l_train_unique))
 
             # Second test: train and test add up to all the data
             assert l[train].size + l[test].size == l.size
@@ -1357,8 +1355,8 @@ def test_train_test_split_sparse():
     for InputFeatureType in sparse_types:
         X_s = InputFeatureType(X)
         X_train, X_test = train_test_split(X_s)
-        assert isspmatrix_csr(X_train)
-        assert isspmatrix_csr(X_test)
+        assert issparse(X_train) and X_train.format == "csr"
+        assert issparse(X_test) and X_test.format == "csr"
 
 
 def test_train_test_split_mock_pandas():
diff --git a/sklearn/model_selection/tests/test_successive_halving.py b/sklearn/model_selection/tests/test_successive_halving.py
index 035e20dc701d8..6c89f89afa684 100644
--- a/sklearn/model_selection/tests/test_successive_halving.py
+++ b/sklearn/model_selection/tests/test_successive_halving.py
@@ -1,26 +1,33 @@
 from math import ceil
 
-import pytest
-from scipy.stats import norm, randint
 import numpy as np
+import pytest
+from scipy.stats import expon, norm, randint
 
 from sklearn.datasets import make_classification
 from sklearn.dummy import DummyClassifier
 from sklearn.experimental import enable_halving_search_cv  # noqa
-from sklearn.model_selection import StratifiedKFold
-from sklearn.model_selection import StratifiedShuffleSplit
-from sklearn.model_selection import LeaveOneGroupOut
-from sklearn.model_selection import LeavePGroupsOut
-from sklearn.model_selection import GroupKFold
-from sklearn.model_selection import GroupShuffleSplit
-from sklearn.model_selection import HalvingGridSearchCV
-from sklearn.model_selection import HalvingRandomSearchCV
-from sklearn.model_selection import KFold, ShuffleSplit
-from sklearn.svm import LinearSVC
+from sklearn.model_selection import (
+    GroupKFold,
+    GroupShuffleSplit,
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+    KFold,
+    LeaveOneGroupOut,
+    LeavePGroupsOut,
+    ShuffleSplit,
+    StratifiedKFold,
+    StratifiedShuffleSplit,
+)
 from sklearn.model_selection._search_successive_halving import (
     _SubsampleMetaSplitter,
     _top_k,
 )
+from sklearn.model_selection.tests.test_search import (
+    check_cv_results_array_types,
+    check_cv_results_keys,
+)
+from sklearn.svm import SVC, LinearSVC
 
 
 class FastClassifier(DummyClassifier):
@@ -774,3 +781,68 @@ def test_select_best_index(SearchCV):
     # we expect the index of 'i'
     best_index = SearchCV._select_best_index(None, None, results)
     assert best_index == 8
+
+
+def test_halving_random_search_list_of_dicts():
+    """Check the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
+    being a list of dictionary.
+    """
+    X, y = make_classification(n_samples=150, n_features=4, random_state=42)
+
+    params = [
+        {"kernel": ["rbf"], "C": expon(scale=10), "gamma": expon(scale=0.1)},
+        {"kernel": ["poly"], "degree": [2, 3]},
+    ]
+    param_keys = (
+        "param_C",
+        "param_degree",
+        "param_gamma",
+        "param_kernel",
+    )
+    score_keys = (
+        "mean_test_score",
+        "mean_train_score",
+        "rank_test_score",
+        "split0_test_score",
+        "split1_test_score",
+        "split2_test_score",
+        "split0_train_score",
+        "split1_train_score",
+        "split2_train_score",
+        "std_test_score",
+        "std_train_score",
+        "mean_fit_time",
+        "std_fit_time",
+        "mean_score_time",
+        "std_score_time",
+    )
+    extra_keys = ("n_resources", "iter")
+
+    search = HalvingRandomSearchCV(
+        SVC(), cv=3, param_distributions=params, return_train_score=True, random_state=0
+    )
+    search.fit(X, y)
+    n_candidates = sum(search.n_candidates_)
+    cv_results = search.cv_results_
+    # Check results structure
+    check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates, extra_keys)
+    check_cv_results_array_types(search, param_keys, score_keys)
+
+    assert all(
+        (
+            cv_results["param_C"].mask[i]
+            and cv_results["param_gamma"].mask[i]
+            and not cv_results["param_degree"].mask[i]
+        )
+        for i in range(n_candidates)
+        if cv_results["param_kernel"][i] == "poly"
+    )
+    assert all(
+        (
+            not cv_results["param_C"].mask[i]
+            and not cv_results["param_gamma"].mask[i]
+            and cv_results["param_degree"].mask[i]
+        )
+        for i in range(n_candidates)
+        if cv_results["param_kernel"][i] == "rbf"
+    )
diff --git a/sklearn/model_selection/tests/test_validation.py b/sklearn/model_selection/tests/test_validation.py
index 6905ffa295b86..7eb6002c34b1e 100644
--- a/sklearn/model_selection/tests/test_validation.py
+++ b/sklearn/model_selection/tests/test_validation.py
@@ -5,78 +5,83 @@
 import tempfile
 import warnings
 from functools import partial
+from io import StringIO
 from time import sleep
 
-import pytest
 import numpy as np
-from scipy.sparse import coo_matrix, csr_matrix
-from scipy.sparse import issparse
-from sklearn.exceptions import FitFailedWarning
-
-from sklearn.model_selection.tests.test_search import FailingClassifier
-
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._mocking import CheckingClassifier, MockDataFrame
-
-from sklearn.utils.validation import _num_samples
+import pytest
+from scipy.sparse import coo_matrix, csr_matrix, issparse
 
-from sklearn.model_selection import cross_val_score, ShuffleSplit
-from sklearn.model_selection import cross_val_predict
-from sklearn.model_selection import cross_validate
-from sklearn.model_selection import permutation_test_score
-from sklearn.model_selection import KFold
-from sklearn.model_selection import StratifiedKFold
-from sklearn.model_selection import LeaveOneOut
-from sklearn.model_selection import LeaveOneGroupOut
-from sklearn.model_selection import LeavePGroupsOut
-from sklearn.model_selection import GroupKFold
-from sklearn.model_selection import GroupShuffleSplit
-from sklearn.model_selection import learning_curve
-from sklearn.model_selection import validation_curve
-from sklearn.model_selection._validation import _check_is_permutation
-from sklearn.model_selection._validation import _fit_and_score
-from sklearn.model_selection._validation import _score
-
-from sklearn.datasets import make_regression
-from sklearn.datasets import load_diabetes
-from sklearn.datasets import load_iris
-from sklearn.datasets import load_digits
-from sklearn.metrics import explained_variance_score
-from sklearn.metrics import make_scorer
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import confusion_matrix
-from sklearn.metrics import precision_recall_fscore_support
-from sklearn.metrics import precision_score
-from sklearn.metrics import r2_score
-from sklearn.metrics import mean_squared_error
-from sklearn.metrics import check_scoring
-
-from sklearn.linear_model import Ridge, LogisticRegression, SGDClassifier
-from sklearn.linear_model import PassiveAggressiveClassifier, RidgeClassifier
+from sklearn.base import BaseEstimator, clone
+from sklearn.cluster import KMeans
+from sklearn.datasets import (
+    load_diabetes,
+    load_digits,
+    load_iris,
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
 from sklearn.ensemble import RandomForestClassifier
+from sklearn.exceptions import FitFailedWarning
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import (
+    LogisticRegression,
+    PassiveAggressiveClassifier,
+    Ridge,
+    RidgeClassifier,
+    SGDClassifier,
+)
+from sklearn.metrics import (
+    accuracy_score,
+    check_scoring,
+    confusion_matrix,
+    explained_variance_score,
+    make_scorer,
+    mean_squared_error,
+    precision_recall_fscore_support,
+    precision_score,
+    r2_score,
+)
+from sklearn.model_selection import (
+    GridSearchCV,
+    GroupKFold,
+    GroupShuffleSplit,
+    KFold,
+    LeaveOneGroupOut,
+    LeaveOneOut,
+    LeavePGroupsOut,
+    ShuffleSplit,
+    StratifiedKFold,
+    cross_val_predict,
+    cross_val_score,
+    cross_validate,
+    learning_curve,
+    permutation_test_score,
+    validation_curve,
+)
+from sklearn.model_selection._validation import (
+    _check_is_permutation,
+    _fit_and_score,
+    _score,
+)
+from sklearn.model_selection.tests.common import OneTimeSplitter
+from sklearn.model_selection.tests.test_search import FailingClassifier
+from sklearn.multiclass import OneVsRestClassifier
 from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import SVC, LinearSVC
-from sklearn.cluster import KMeans
 from sklearn.neural_network import MLPRegressor
-
-from sklearn.impute import SimpleImputer
-
-from sklearn.preprocessing import LabelEncoder, scale
 from sklearn.pipeline import Pipeline
-
-from io import StringIO
-from sklearn.base import BaseEstimator
-from sklearn.base import clone
-from sklearn.multiclass import OneVsRestClassifier
+from sklearn.preprocessing import LabelEncoder, scale
+from sklearn.svm import SVC, LinearSVC
 from sklearn.utils import shuffle
-from sklearn.datasets import make_classification
-from sklearn.datasets import make_multilabel_classification
-
-from sklearn.model_selection.tests.common import OneTimeSplitter
-from sklearn.model_selection import GridSearchCV
+from sklearn.utils._mocking import CheckingClassifier, MockDataFrame
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.validation import _num_samples
 
 
 class MockImprovingEstimator(BaseEstimator):
@@ -611,7 +616,7 @@ def test_cross_val_score_pandas():
     # check cross_val_score doesn't destroy pandas dataframe
     types = [(MockDataFrame, MockDataFrame)]
     try:
-        from pandas import Series, DataFrame
+        from pandas import DataFrame, Series
 
         types.append((Series, DataFrame))
     except ImportError:
@@ -1112,7 +1117,7 @@ def test_cross_val_predict_pandas():
     # check cross_val_score doesn't destroy pandas dataframe
     types = [(MockDataFrame, MockDataFrame)]
     try:
-        from pandas import Series, DataFrame
+        from pandas import DataFrame, Series
 
         types.append((Series, DataFrame))
     except ImportError:
@@ -2067,7 +2072,7 @@ def test_permutation_test_score_pandas():
     # check permutation_test_score doesn't destroy pandas dataframe
     types = [(MockDataFrame, MockDataFrame)]
     try:
-        from pandas import Series, DataFrame
+        from pandas import DataFrame, Series
 
         types.append((Series, DataFrame))
     except ImportError:
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index 4c30bcdb6cac3..d87b4bfb02d6c 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -34,30 +34,36 @@
 # License: BSD 3 clause
 
 import array
+import itertools
+import warnings
 from numbers import Integral, Real
+
 import numpy as np
-import warnings
 import scipy.sparse as sp
-import itertools
 
-from .base import BaseEstimator, ClassifierMixin, clone, is_classifier
-from .base import MultiOutputMixin
-from .base import MetaEstimatorMixin, is_regressor
-from .base import _fit_context
-from .preprocessing import LabelBinarizer
+from .base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MetaEstimatorMixin,
+    MultiOutputMixin,
+    _fit_context,
+    clone,
+    is_classifier,
+    is_regressor,
+)
 from .metrics.pairwise import pairwise_distances_argmin
+from .preprocessing import LabelBinarizer
 from .utils import check_random_state
 from .utils._param_validation import HasMethods, Interval
 from .utils._tags import _safe_tags
-from .utils.validation import _num_samples
-from .utils.validation import check_is_fitted
+from .utils.metaestimators import _safe_split, available_if
 from .utils.multiclass import (
     _check_partial_fit_first_call,
-    check_classification_targets,
     _ovr_decision_function,
+    check_classification_targets,
 )
-from .utils.metaestimators import _safe_split, available_if
-from .utils.parallel import delayed, Parallel
+from .utils.parallel import Parallel, delayed
+from .utils.validation import _num_samples, check_is_fitted
 
 __all__ = [
     "OneVsRestClassifier",
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index 8bb954e976f4c..02aa5da827605 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -26,23 +26,23 @@
     ClassifierMixin,
     MetaEstimatorMixin,
     RegressorMixin,
+    _fit_context,
     clone,
     is_classifier,
-    _fit_context,
 )
 from .model_selection import cross_val_predict
-from .utils import _print_elapsed_time, check_random_state, Bunch
+from .utils import Bunch, _print_elapsed_time, check_random_state
+from .utils._param_validation import HasMethods, StrOptions
 from .utils.metadata_routing import (
     MetadataRouter,
     MethodMapping,
-    process_routing,
     _routing_enabled,
+    process_routing,
 )
 from .utils.metaestimators import available_if
 from .utils.multiclass import check_classification_targets
+from .utils.parallel import Parallel, delayed
 from .utils.validation import _check_fit_params, check_is_fitted, has_fit_parameter
-from .utils.parallel import delayed, Parallel
-from .utils._param_validation import HasMethods, StrOptions
 
 __all__ = [
     "MultiOutputRegressor",
@@ -322,7 +322,7 @@ def get_metadata_routing(self):
         Returns
         -------
         routing : MetadataRouter
-            A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
             routing information.
         """
         router = MetadataRouter(owner=self.__class__.__name__).add(
@@ -782,6 +782,11 @@ class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain):
     all of the available features provided to the model plus the predictions
     of models that are earlier in the chain.
 
+    For an example of how to use ``ClassifierChain`` and benefit from its
+    ensemble, see
+    :ref:`ClassifierChain on a yeast dataset
+    <sphx_glr_auto_examples_multioutput_plot_classifier_chain_yeast.py>` example.
+
     Read more in the :ref:`User Guide <classifierchain>`.
 
     .. versionadded:: 0.19
@@ -859,7 +864,7 @@ class labels for each estimator in the chain.
     See Also
     --------
     RegressorChain : Equivalent for regression.
-    MultioutputClassifier : Classifies each output independently rather than
+    MultiOutputClassifier : Classifies each output independently rather than
         chaining.
 
     References
@@ -1006,7 +1011,7 @@ def get_metadata_routing(self):
         Returns
         -------
         routing : MetadataRouter
-            A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
             routing information.
         """
         router = MetadataRouter(owner=self.__class__.__name__).add(
@@ -1155,7 +1160,7 @@ def get_metadata_routing(self):
         Returns
         -------
         routing : MetadataRouter
-            A :class:`~utils.metadata_routing.MetadataRouter` encapsulating
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
             routing information.
         """
         router = MetadataRouter(owner=self.__class__.__name__).add(
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 76d7189385828..9ee664bf8b3a4 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -14,23 +14,18 @@
 #
 # License: BSD 3 clause
 import warnings
-
 from abc import ABCMeta, abstractmethod
-from numbers import Real, Integral
+from numbers import Integral, Real
 
 import numpy as np
 from scipy.special import logsumexp
 
-from .base import BaseEstimator, ClassifierMixin
-from .base import _fit_context
-from .preprocessing import binarize
-from .preprocessing import LabelBinarizer
-from .preprocessing import label_binarize
+from .base import BaseEstimator, ClassifierMixin, _fit_context
+from .preprocessing import LabelBinarizer, binarize, label_binarize
+from .utils._param_validation import Hidden, Interval, StrOptions
 from .utils.extmath import safe_sparse_dot
 from .utils.multiclass import _check_partial_fit_first_call
-from .utils.validation import check_is_fitted, check_non_negative
-from .utils.validation import _check_sample_weight
-from .utils._param_validation import Interval, Hidden, StrOptions
+from .utils.validation import _check_sample_weight, check_is_fitted, check_non_negative
 
 __all__ = [
     "BernoulliNB",
@@ -472,7 +467,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):
         classes = self.classes_
 
         unique_y = np.unique(y)
-        unique_y_in_classes = np.in1d(unique_y, classes)
+        unique_y_in_classes = np.isin(unique_y, classes)
 
         if not np.all(unique_y_in_classes):
             raise ValueError(
diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py
index 8223c20991904..ce697656b4c2e 100644
--- a/sklearn/neighbors/__init__.py
+++ b/sklearn/neighbors/__init__.py
@@ -4,18 +4,21 @@
 """
 
 from ._ball_tree import BallTree
-from ._kd_tree import KDTree
-from ._graph import kneighbors_graph, radius_neighbors_graph
-from ._graph import KNeighborsTransformer, RadiusNeighborsTransformer
-from ._unsupervised import NearestNeighbors
+from ._base import VALID_METRICS, VALID_METRICS_SPARSE, sort_graph_by_row_values
 from ._classification import KNeighborsClassifier, RadiusNeighborsClassifier
-from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor
-from ._nearest_centroid import NearestCentroid
+from ._graph import (
+    KNeighborsTransformer,
+    RadiusNeighborsTransformer,
+    kneighbors_graph,
+    radius_neighbors_graph,
+)
+from ._kd_tree import KDTree
 from ._kde import KernelDensity
 from ._lof import LocalOutlierFactor
 from ._nca import NeighborhoodComponentsAnalysis
-from ._base import sort_graph_by_row_values
-from ._base import VALID_METRICS, VALID_METRICS_SPARSE
+from ._nearest_centroid import NearestCentroid
+from ._regression import KNeighborsRegressor, RadiusNeighborsRegressor
+from ._unsupervised import NearestNeighbors
 
 __all__ = [
     "BallTree",
diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py
index c812149970e81..dcff18e10fa48 100644
--- a/sklearn/neighbors/_base.py
+++ b/sklearn/neighbors/_base.py
@@ -7,39 +7,36 @@
 #
 # License: BSD 3 clause (C) INRIA, University of Amsterdam
 import itertools
-from functools import partial
-
+import numbers
 import warnings
 from abc import ABCMeta, abstractmethod
-import numbers
+from functools import partial
 from numbers import Integral, Real
 
 import numpy as np
-from scipy.sparse import csr_matrix, issparse
 from joblib import effective_n_jobs
+from scipy.sparse import csr_matrix, issparse
 
-from ._ball_tree import BallTree
-from ._kd_tree import KDTree
-from ..base import BaseEstimator, MultiOutputMixin
-from ..base import is_classifier
+from ..base import BaseEstimator, MultiOutputMixin, is_classifier
+from ..exceptions import DataConversionWarning, EfficiencyWarning
 from ..metrics import pairwise_distances_chunked
-from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
 from ..metrics._pairwise_distances_reduction import (
     ArgKmin,
     RadiusNeighbors,
 )
+from ..metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
 from ..utils import (
+    _to_object_array,
     check_array,
     gen_even_slices,
-    _to_object_array,
 )
-from ..utils.multiclass import check_classification_targets
-from ..utils.validation import check_is_fitted
-from ..utils.validation import check_non_negative
 from ..utils._param_validation import Interval, StrOptions, validate_params
-from ..utils.parallel import delayed, Parallel
 from ..utils.fixes import parse_version, sp_base_version
-from ..exceptions import DataConversionWarning, EfficiencyWarning
+from ..utils.multiclass import check_classification_targets
+from ..utils.parallel import Parallel, delayed
+from ..utils.validation import check_is_fitted, check_non_negative
+from ._ball_tree import BallTree
+from ._kd_tree import KDTree
 
 SCIPY_METRICS = [
     "braycurtis",
@@ -68,8 +65,8 @@
     SCIPY_METRICS += ["matching"]
 
 VALID_METRICS = dict(
-    ball_tree=BallTree._valid_metrics,
-    kd_tree=KDTree._valid_metrics,
+    ball_tree=BallTree.valid_metrics,
+    kd_tree=KDTree.valid_metrics,
     # The following list comes from the
     # sklearn.metrics.pairwise doc string
     brute=sorted(set(PAIRWISE_DISTANCE_FUNCTIONS).union(SCIPY_METRICS)),
@@ -198,7 +195,8 @@ def _check_precomputed(X):
         "graph": ["sparse matrix"],
         "copy": ["boolean"],
         "warn_when_not_sorted": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def sort_graph_by_row_values(graph, copy=False, warn_when_not_sorted=True):
     """Sort a sparse graph such that each row is stored with increasing values.
diff --git a/sklearn/neighbors/_binary_tree.pxi b/sklearn/neighbors/_binary_tree.pxi
index 7d4d08b2703a4..b60ea3a0a6d70 100644
--- a/sklearn/neighbors/_binary_tree.pxi
+++ b/sklearn/neighbors/_binary_tree.pxi
@@ -236,9 +236,10 @@ metric : str or DistanceMetric64 object, default='minkowski'
     Metric to use for distance computation. Default is "minkowski", which
     results in the standard Euclidean distance when p = 2.
     A list of valid metrics for {BinaryTree} is given by
-    :meth:`{BinaryTree}.valid_metrics`.
+    :attr:`{BinaryTree}.valid_metrics`.
     See the documentation of `scipy.spatial.distance
-    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and the    metrics listed in :class:`~sklearn.metrics.pairwise.distance_metrics` for
+    <https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and
+    the metrics listed in :class:`~sklearn.metrics.pairwise.distance_metrics` for
     more information on any distance metric.
 
 Additional keywords are passed to the distance metric class.
@@ -249,6 +250,8 @@ Attributes
 ----------
 data : memory view
     The training data
+valid_metrics: list of str
+    List of valid distance metrics.
 
 Examples
 --------
@@ -792,7 +795,7 @@ cdef class BinaryTree:
     cdef int n_splits
     cdef int n_calls
 
-    _valid_metrics = VALID_METRIC_IDS
+    valid_metrics = VALID_METRIC_IDS
 
     # Use cinit to initialize all arrays to empty: this will prevent memory
     # errors and seg-faults in rare cases where __init__ is not called
@@ -979,19 +982,6 @@ cdef class BinaryTree:
             self.node_bounds.base,
         )
 
-    @classmethod
-    def valid_metrics(cls):
-        """Get list of valid distance metrics.
-
-        .. versionadded:: 1.3
-
-        Returns
-        -------
-        valid_metrics: list of str
-            List of valid distance metrics.
-        """
-        return cls._valid_metrics
-
     cdef inline float64_t dist(self, float64_t* x1, float64_t* x2,
                                intp_t size) except -1 nogil:
         """Compute the distance between arrays x1 and x2"""
diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py
index e3e2049a8f8e5..5d8fdab348ab8 100644
--- a/sklearn/neighbors/_classification.py
+++ b/sklearn/neighbors/_classification.py
@@ -7,21 +7,20 @@
 #          Multi-output support by Arnaud Joly <a.joly@ulg.ac.be>
 #
 # License: BSD 3 clause (C) INRIA, University of Amsterdam
+import warnings
 from numbers import Integral
 
 import numpy as np
-from ..utils.fixes import _mode
-from ..utils.extmath import weighted_mode
-from ..utils.validation import _is_arraylike, _num_samples, check_is_fitted
 
-import warnings
-from ._base import _get_weights
-from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin
-from ..base import ClassifierMixin
-from ..base import _fit_context
+from sklearn.neighbors._base import _check_precomputed
+
+from ..base import ClassifierMixin, _fit_context
 from ..metrics._pairwise_distances_reduction import ArgKminClassMode
 from ..utils._param_validation import StrOptions
-from sklearn.neighbors._base import _check_precomputed
+from ..utils.extmath import weighted_mode
+from ..utils.fixes import _mode
+from ..utils.validation import _is_arraylike, _num_samples, check_is_fitted
+from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin, _get_weights
 
 
 def _adjusted_metric(metric, metric_kwargs, p=None):
@@ -55,6 +54,11 @@ class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase):
           array of distances, and returns an array of the same shape
           containing the weights.
 
+        Refer to the example entitled
+        :ref:`sphx_glr_auto_examples_neighbors_plot_classification.py`
+        showing the impact of the `weights` parameter on the decision
+        boundary.
+
     algorithm : {'auto', 'ball_tree', 'kd_tree', 'brute'}, default='auto'
         Algorithm used to compute the nearest neighbors:
 
@@ -73,7 +77,7 @@ class KNeighborsClassifier(KNeighborsMixin, ClassifierMixin, NeighborsBase):
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
-    p : int, default=2
+    p : float, default=2
         Power parameter for the Minkowski metric. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
         (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
@@ -434,7 +438,7 @@ class RadiusNeighborsClassifier(RadiusNeighborsMixin, ClassifierMixin, Neighbors
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
-    p : int, default=2
+    p : float, default=2
         Power parameter for the Minkowski metric. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
         (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
diff --git a/sklearn/neighbors/_graph.py b/sklearn/neighbors/_graph.py
index e815d12e293c9..98cc91141b124 100644
--- a/sklearn/neighbors/_graph.py
+++ b/sklearn/neighbors/_graph.py
@@ -4,13 +4,11 @@
 #         Tom Dupre la Tour
 #
 # License: BSD 3 clause (C) INRIA, University of Amsterdam
-from ._base import KNeighborsMixin, RadiusNeighborsMixin
-from ._base import NeighborsBase
-from ._unsupervised import NearestNeighbors
-from ..base import TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
+from ..base import ClassNamePrefixFeaturesOutMixin, TransformerMixin, _fit_context
 from ..utils._param_validation import StrOptions
 from ..utils.validation import check_is_fitted
+from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin
+from ._unsupervised import NearestNeighbors
 
 
 def _check_params(X, metric, p, metric_params):
@@ -75,7 +73,7 @@ def kneighbors_graph(
         :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric
         values.
 
-    p : int, default=2
+    p : float, default=2
         Power parameter for the Minkowski metric. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
         (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
@@ -170,7 +168,7 @@ def radius_neighbors_graph(
         :class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric
         values.
 
-    p : int, default=2
+    p : float, default=2
         Power parameter for the Minkowski metric. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
         (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
@@ -283,7 +281,7 @@ class KNeighborsTransformer(
 
         Distance matrices are not supported.
 
-    p : int, default=2
+    p : float, default=2
         Parameter for the Minkowski metric from
         sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
@@ -510,7 +508,7 @@ class RadiusNeighborsTransformer(
 
         Distance matrices are not supported.
 
-    p : int, default=2
+    p : float, default=2
         Parameter for the Minkowski metric from
         sklearn.metrics.pairwise.pairwise_distances. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
diff --git a/sklearn/neighbors/_kde.py b/sklearn/neighbors/_kde.py
index 7f7b38497d209..8885fb4c8c5d0 100644
--- a/sklearn/neighbors/_kde.py
+++ b/sklearn/neighbors/_kde.py
@@ -9,17 +9,15 @@
 import numpy as np
 from scipy.special import gammainc
 
-from ..base import BaseEstimator
-from ..base import _fit_context
+from ..base import BaseEstimator, _fit_context
 from ..neighbors._base import VALID_METRICS
 from ..utils import check_random_state
-from ..utils.validation import _check_sample_weight, check_is_fitted
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import row_norms
+from ..utils.validation import _check_sample_weight, check_is_fitted
 from ._ball_tree import BallTree
 from ._kd_tree import KDTree
 
-
 VALID_KERNELS = [
     "gaussian",
     "tophat",
@@ -175,12 +173,12 @@ def _choose_algorithm(self, algorithm, metric):
         # algorithm to compute the result.
         if algorithm == "auto":
             # use KD Tree if possible
-            if metric in KDTree.valid_metrics():
+            if metric in KDTree.valid_metrics:
                 return "kd_tree"
-            elif metric in BallTree.valid_metrics():
+            elif metric in BallTree.valid_metrics:
                 return "ball_tree"
         else:  # kd_tree or ball_tree
-            if metric not in TREE_DICT[algorithm].valid_metrics():
+            if metric not in TREE_DICT[algorithm].valid_metrics:
                 raise ValueError(
                     "invalid metric for {0}: '{1}'".format(TREE_DICT[algorithm], metric)
                 )
diff --git a/sklearn/neighbors/_lof.py b/sklearn/neighbors/_lof.py
index 40cdc9ab5fb9d..05dfdb13a1cbe 100644
--- a/sklearn/neighbors/_lof.py
+++ b/sklearn/neighbors/_lof.py
@@ -2,19 +2,17 @@
 #          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
 # License: BSD 3 clause
 
-import numpy as np
 import warnings
-
-from ._base import NeighborsBase
-from ._base import KNeighborsMixin
-from ..base import OutlierMixin
-from ..base import _fit_context
 from numbers import Real
 
+import numpy as np
+
+from ..base import OutlierMixin, _fit_context
+from ..utils import check_array
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.metaestimators import available_if
 from ..utils.validation import check_is_fitted
-from ..utils import check_array
+from ._base import KNeighborsMixin, NeighborsBase
 
 __all__ = ["LocalOutlierFactor"]
 
@@ -78,9 +76,9 @@ class LocalOutlierFactor(KNeighborsMixin, OutlierMixin, NeighborsBase):
         between those vectors. This works for Scipy's metrics, but is less
         efficient than passing the metric name as a string.
 
-    p : int, default=2
+    p : float, default=2
         Parameter for the Minkowski metric from
-        :func:`sklearn.metrics.pairwise.pairwise_distances`. When p = 1, this
+        :func:`sklearn.metrics.pairwise_distances`. When p = 1, this
         is equivalent to using manhattan_distance (l1), and euclidean_distance
         (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
 
diff --git a/sklearn/neighbors/_nca.py b/sklearn/neighbors/_nca.py
index 246f0adcb36ad..d302aef0dc0a2 100644
--- a/sklearn/neighbors/_nca.py
+++ b/sklearn/neighbors/_nca.py
@@ -6,23 +6,29 @@
 #          John Chiotellis <ioannis.chiotellis@in.tum.de>
 # License: BSD 3 clause
 
-from warnings import warn
-from numbers import Integral, Real
-import numpy as np
 import sys
 import time
+from numbers import Integral, Real
+from warnings import warn
+
+import numpy as np
 from scipy.optimize import minimize
-from ..utils.extmath import softmax
+
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..decomposition import PCA
+from ..exceptions import ConvergenceWarning
 from ..metrics import pairwise_distances
-from ..base import BaseEstimator, TransformerMixin, ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
 from ..preprocessing import LabelEncoder
-from ..decomposition import PCA
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import softmax
 from ..utils.multiclass import check_classification_targets
 from ..utils.random import check_random_state
-from ..utils.validation import check_is_fitted, check_array
-from ..utils._param_validation import Interval, StrOptions
-from ..exceptions import ConvergenceWarning
+from ..utils.validation import check_array, check_is_fitted
 
 
 class NeighborhoodComponentsAnalysis(
diff --git a/sklearn/neighbors/_nearest_centroid.py b/sklearn/neighbors/_nearest_centroid.py
index 315393bf597e4..75086ee25448e 100644
--- a/sklearn/neighbors/_nearest_centroid.py
+++ b/sklearn/neighbors/_nearest_centroid.py
@@ -8,19 +8,20 @@
 # License: BSD 3 clause
 
 import warnings
-import numpy as np
 from numbers import Real
+
+import numpy as np
 from scipy import sparse as sp
 
-from ..base import BaseEstimator, ClassifierMixin
-from ..base import _fit_context
+from sklearn.metrics.pairwise import _VALID_METRICS
+
+from ..base import BaseEstimator, ClassifierMixin, _fit_context
 from ..metrics.pairwise import pairwise_distances_argmin
 from ..preprocessing import LabelEncoder
-from ..utils.validation import check_is_fitted
-from ..utils.sparsefuncs import csc_median_axis_0
-from ..utils.multiclass import check_classification_targets
 from ..utils._param_validation import Interval, StrOptions
-from sklearn.metrics.pairwise import _VALID_METRICS
+from ..utils.multiclass import check_classification_targets
+from ..utils.sparsefuncs import csc_median_axis_0
+from ..utils.validation import check_is_fitted
 
 
 class NearestCentroid(ClassifierMixin, BaseEstimator):
diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py
index b2050345c9833..b9b7f4030d02c 100644
--- a/sklearn/neighbors/_regression.py
+++ b/sklearn/neighbors/_regression.py
@@ -14,11 +14,9 @@
 
 import numpy as np
 
-from ._base import _get_weights
-from ._base import NeighborsBase, KNeighborsMixin, RadiusNeighborsMixin
-from ..base import RegressorMixin
-from ..base import _fit_context
+from ..base import RegressorMixin, _fit_context
 from ..utils._param_validation import StrOptions
+from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin, _get_weights
 
 
 class KNeighborsRegressor(KNeighborsMixin, RegressorMixin, NeighborsBase):
@@ -68,7 +66,7 @@ class KNeighborsRegressor(KNeighborsMixin, RegressorMixin, NeighborsBase):
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
-    p : int, default=2
+    p : float, default=2
         Power parameter for the Minkowski metric. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
         (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
@@ -311,7 +309,7 @@ class RadiusNeighborsRegressor(RadiusNeighborsMixin, RegressorMixin, NeighborsBa
         required to store the tree.  The optimal value depends on the
         nature of the problem.
 
-    p : int, default=2
+    p : float, default=2
         Power parameter for the Minkowski metric. When p = 1, this is
         equivalent to using manhattan_distance (l1), and euclidean_distance
         (l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.
diff --git a/sklearn/neighbors/_unsupervised.py b/sklearn/neighbors/_unsupervised.py
index 05607f0bd0c71..fbac427987be3 100644
--- a/sklearn/neighbors/_unsupervised.py
+++ b/sklearn/neighbors/_unsupervised.py
@@ -1,8 +1,6 @@
 """Unsupervised nearest neighbors learner"""
 from ..base import _fit_context
-from ._base import NeighborsBase
-from ._base import KNeighborsMixin
-from ._base import RadiusNeighborsMixin
+from ._base import KNeighborsMixin, NeighborsBase, RadiusNeighborsMixin
 
 
 class NearestNeighbors(KNeighborsMixin, RadiusNeighborsMixin, NeighborsBase):
@@ -118,14 +116,11 @@ class NearestNeighbors(KNeighborsMixin, RadiusNeighborsMixin, NeighborsBase):
     >>> import numpy as np
     >>> from sklearn.neighbors import NearestNeighbors
     >>> samples = [[0, 0, 2], [1, 0, 0], [0, 0, 1]]
-
     >>> neigh = NearestNeighbors(n_neighbors=2, radius=0.4)
     >>> neigh.fit(samples)
     NearestNeighbors(...)
-
     >>> neigh.kneighbors([[0, 0, 1.3]], 2, return_distance=False)
     array([[2, 0]]...)
-
     >>> nbrs = neigh.radius_neighbors(
     ...    [[0, 0, 1.3]], 0.4, return_distance=False
     ... )
diff --git a/sklearn/neighbors/tests/test_ball_tree.py b/sklearn/neighbors/tests/test_ball_tree.py
index a5aee91efa80b..efca4e491ce01 100644
--- a/sklearn/neighbors/tests/test_ball_tree.py
+++ b/sklearn/neighbors/tests/test_ball_tree.py
@@ -3,10 +3,11 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_array_almost_equal
+
 from sklearn.neighbors._ball_tree import BallTree
 from sklearn.utils import check_random_state
-from sklearn.utils.validation import check_array
 from sklearn.utils._testing import _convert_container
+from sklearn.utils.validation import check_array
 
 rng = np.random.RandomState(10)
 V_mahalanobis = rng.rand(3, 3)
diff --git a/sklearn/neighbors/tests/test_kd_tree.py b/sklearn/neighbors/tests/test_kd_tree.py
index 525c15436e24c..1aee28cc36bd0 100644
--- a/sklearn/neighbors/tests/test_kd_tree.py
+++ b/sklearn/neighbors/tests/test_kd_tree.py
@@ -1,8 +1,8 @@
 import numpy as np
 import pytest
-from sklearn.utils.parallel import delayed, Parallel
 
 from sklearn.neighbors._kd_tree import KDTree
+from sklearn.utils.parallel import Parallel, delayed
 
 DIMENSION = 3
 
diff --git a/sklearn/neighbors/tests/test_kde.py b/sklearn/neighbors/tests/test_kde.py
index 69cd3c8f5693f..b6bf09d01b672 100644
--- a/sklearn/neighbors/tests/test_kde.py
+++ b/sklearn/neighbors/tests/test_kde.py
@@ -1,16 +1,15 @@
+import joblib
 import numpy as np
-
 import pytest
 
-from sklearn.utils._testing import assert_allclose
-from sklearn.neighbors import KernelDensity, KDTree, NearestNeighbors
-from sklearn.neighbors._ball_tree import kernel_norm
-from sklearn.pipeline import make_pipeline
 from sklearn.datasets import make_blobs
+from sklearn.exceptions import NotFittedError
 from sklearn.model_selection import GridSearchCV
+from sklearn.neighbors import KDTree, KernelDensity, NearestNeighbors
+from sklearn.neighbors._ball_tree import kernel_norm
+from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
-from sklearn.exceptions import NotFittedError
-import joblib
+from sklearn.utils._testing import assert_allclose
 
 
 # XXX Duplicated in test_neighbors_tree, test_kde
@@ -114,7 +113,7 @@ def test_kde_algorithm_metric_choice(algorithm, metric):
 
     kde = KernelDensity(algorithm=algorithm, metric=metric)
 
-    if algorithm == "kd_tree" and metric not in KDTree.valid_metrics():
+    if algorithm == "kd_tree" and metric not in KDTree.valid_metrics:
         with pytest.raises(ValueError, match="invalid metric"):
             kde.fit(X)
     else:
@@ -165,7 +164,7 @@ def test_kde_sample_weights():
         test_points = rng.rand(n_samples_test, d)
         for algorithm in ["auto", "ball_tree", "kd_tree"]:
             for metric in ["euclidean", "minkowski", "manhattan", "chebyshev"]:
-                if algorithm != "kd_tree" or metric in KDTree.valid_metrics():
+                if algorithm != "kd_tree" or metric in KDTree.valid_metrics:
                     kde = KernelDensity(algorithm=algorithm, metric=metric)
 
                     # Test that adding a constant sample weight has no effect
diff --git a/sklearn/neighbors/tests/test_lof.py b/sklearn/neighbors/tests/test_lof.py
index 38cc55717c404..4a877b1224104 100644
--- a/sklearn/neighbors/tests/test_lof.py
+++ b/sklearn/neighbors/tests/test_lof.py
@@ -2,26 +2,22 @@
 #          Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
 # License: BSD 3 clause
 
+import re
 from math import sqrt
 
 import numpy as np
-from scipy.sparse import csr_matrix
-
-from sklearn import neighbors
-import re
 import pytest
+from scipy.sparse import csr_matrix
 
-from sklearn import metrics
+from sklearn import metrics, neighbors
+from sklearn.datasets import load_iris
 from sklearn.metrics import roc_auc_score
-
 from sklearn.utils import check_random_state
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils.estimator_checks import check_outlier_corruption
-from sklearn.utils.estimator_checks import parametrize_with_checks
-
-from sklearn.datasets import load_iris
-
+from sklearn.utils._testing import assert_allclose, assert_array_equal
+from sklearn.utils.estimator_checks import (
+    check_outlier_corruption,
+    parametrize_with_checks,
+)
 
 # load the iris dataset
 # and randomly permute it
diff --git a/sklearn/neighbors/tests/test_nca.py b/sklearn/neighbors/tests/test_nca.py
index df2dccc5829c3..7dedd97ff423b 100644
--- a/sklearn/neighbors/tests/test_nca.py
+++ b/sklearn/neighbors/tests/test_nca.py
@@ -6,19 +6,20 @@
 #          John Chiotellis <ioannis.chiotellis@in.tum.de>
 # License: BSD 3 clause
 
-import pytest
 import re
+
 import numpy as np
-from numpy.testing import assert_array_equal, assert_array_almost_equal
+import pytest
+from numpy.testing import assert_array_almost_equal, assert_array_equal
 from scipy.optimize import check_grad
+
 from sklearn import clone
+from sklearn.datasets import load_iris, make_blobs, make_classification
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils import check_random_state
-from sklearn.datasets import load_iris, make_classification, make_blobs
-from sklearn.neighbors import NeighborhoodComponentsAnalysis
 from sklearn.metrics import pairwise_distances
+from sklearn.neighbors import NeighborhoodComponentsAnalysis
 from sklearn.preprocessing import LabelEncoder
-
+from sklearn.utils import check_random_state
 
 rng = check_random_state(0)
 # load and shuffle iris dataset
diff --git a/sklearn/neighbors/tests/test_nearest_centroid.py b/sklearn/neighbors/tests/test_nearest_centroid.py
index 861d09e92683c..7cf54b56f58db 100644
--- a/sklearn/neighbors/tests/test_nearest_centroid.py
+++ b/sklearn/neighbors/tests/test_nearest_centroid.py
@@ -3,11 +3,11 @@
 """
 import numpy as np
 import pytest
-from scipy import sparse as sp
 from numpy.testing import assert_array_equal
+from scipy import sparse as sp
 
-from sklearn.neighbors import NearestCentroid
 from sklearn import datasets
+from sklearn.neighbors import NearestCentroid
 
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py
index ac4ccfd9343be..339861dac6ab3 100644
--- a/sklearn/neighbors/tests/test_neighbors.py
+++ b/sklearn/neighbors/tests/test_neighbors.py
@@ -1,18 +1,19 @@
-from itertools import product
+import re
 import warnings
+from itertools import product
 
-import pytest
-import re
+import joblib
 import numpy as np
+import pytest
 from scipy.sparse import (
     bsr_matrix,
     coo_matrix,
     csc_matrix,
     csr_matrix,
-    dok_matrix,
     dia_matrix,
-    lil_matrix,
+    dok_matrix,
     issparse,
+    lil_matrix,
 )
 
 from sklearn import (
@@ -22,36 +23,31 @@
     neighbors,
 )
 from sklearn.base import clone
-from sklearn.exceptions import DataConversionWarning
-from sklearn.exceptions import EfficiencyWarning
-from sklearn.exceptions import NotFittedError
+from sklearn.exceptions import DataConversionWarning, EfficiencyWarning, NotFittedError
 from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.metrics.tests.test_dist_metrics import BOOL_METRICS
 from sklearn.metrics.tests.test_pairwise_distances_reduction import (
     assert_radius_neighbors_results_equality,
 )
-from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import train_test_split
+from sklearn.model_selection import cross_val_score, train_test_split
 from sklearn.neighbors import (
     VALID_METRICS_SPARSE,
     KNeighborsRegressor,
 )
 from sklearn.neighbors._base import (
-    _is_sorted_by_data,
+    KNeighborsMixin,
     _check_precomputed,
+    _is_sorted_by_data,
     sort_graph_by_row_values,
-    KNeighborsMixin,
 )
 from sklearn.pipeline import make_pipeline
 from sklearn.utils._testing import (
     assert_allclose,
     assert_array_equal,
+    ignore_warnings,
 )
-from sklearn.utils._testing import ignore_warnings
+from sklearn.utils.fixes import parse_version, sp_version
 from sklearn.utils.validation import check_random_state
-from sklearn.utils.fixes import sp_version, parse_version
-
-import joblib
 
 rng = np.random.RandomState(0)
 # load and shuffle iris dataset
@@ -74,7 +70,6 @@
     set.intersection(*map(set, neighbors.VALID_METRICS.values()))
 )  # type: ignore
 P = (1, 2, 3, 4, np.inf)
-JOBLIB_BACKENDS = list(joblib.parallel.BACKENDS.keys())
 
 # Filter deprecation warnings.
 neighbors.kneighbors_graph = ignore_warnings(neighbors.kneighbors_graph)
@@ -1774,7 +1769,7 @@ def test_non_euclidean_kneighbors():
             X, radius, metric=metric, mode="connectivity", include_self=True
         ).toarray()
         nbrs1 = neighbors.NearestNeighbors(metric=metric, radius=radius).fit(X)
-        assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).A)
+        assert_array_equal(nbrs_graph, nbrs1.radius_neighbors_graph(X).toarray())
 
     # Raise error when wrong parameters are supplied,
     X_nbrs = neighbors.NearestNeighbors(n_neighbors=3, metric="manhattan")
@@ -1811,13 +1806,15 @@ def test_k_and_radius_neighbors_train_is_not_query():
         check_object_arrays(ind, [[1], [0, 1]])
 
         # Test the graph variants.
-        assert_array_equal(nn.kneighbors_graph(test_data).A, [[0.0, 1.0], [0.0, 1.0]])
         assert_array_equal(
-            nn.kneighbors_graph([[2], [1]], mode="distance").A,
+            nn.kneighbors_graph(test_data).toarray(), [[0.0, 1.0], [0.0, 1.0]]
+        )
+        assert_array_equal(
+            nn.kneighbors_graph([[2], [1]], mode="distance").toarray(),
             np.array([[0.0, 1.0], [0.0, 0.0]]),
         )
         rng = nn.radius_neighbors_graph([[2], [1]], radius=1.5)
-        assert_array_equal(rng.A, [[0, 1], [1, 1]])
+        assert_array_equal(rng.toarray(), [[0, 1], [1, 1]])
 
 
 @pytest.mark.parametrize("algorithm", ALGORITHMS)
@@ -1839,7 +1836,7 @@ def test_k_and_radius_neighbors_X_None(algorithm):
     rng = nn.radius_neighbors_graph(None, radius=1.5)
     kng = nn.kneighbors_graph(None)
     for graph in [rng, kng]:
-        assert_array_equal(graph.A, [[0, 1], [1, 0]])
+        assert_array_equal(graph.toarray(), [[0, 1], [1, 0]])
         assert_array_equal(graph.data, [1, 1])
         assert_array_equal(graph.indices, [1, 0])
 
@@ -1847,7 +1844,7 @@ def test_k_and_radius_neighbors_X_None(algorithm):
     nn = neighbors.NearestNeighbors(n_neighbors=2, algorithm=algorithm)
     nn.fit(X)
     assert_array_equal(
-        nn.kneighbors_graph().A,
+        nn.kneighbors_graph().toarray(),
         np.array([[0.0, 1.0, 1.0], [1.0, 0.0, 1.0], [1.0, 1.0, 0]]),
     )
 
@@ -1905,13 +1902,15 @@ def test_k_and_radius_neighbors_duplicates(algorithm):
 def test_include_self_neighbors_graph():
     # Test include_self parameter in neighbors_graph
     X = [[2, 3], [4, 5]]
-    kng = neighbors.kneighbors_graph(X, 1, include_self=True).A
-    kng_not_self = neighbors.kneighbors_graph(X, 1, include_self=False).A
+    kng = neighbors.kneighbors_graph(X, 1, include_self=True).toarray()
+    kng_not_self = neighbors.kneighbors_graph(X, 1, include_self=False).toarray()
     assert_array_equal(kng, [[1.0, 0.0], [0.0, 1.0]])
     assert_array_equal(kng_not_self, [[0.0, 1.0], [1.0, 0.0]])
 
-    rng = neighbors.radius_neighbors_graph(X, 5.0, include_self=True).A
-    rng_not_self = neighbors.radius_neighbors_graph(X, 5.0, include_self=False).A
+    rng = neighbors.radius_neighbors_graph(X, 5.0, include_self=True).toarray()
+    rng_not_self = neighbors.radius_neighbors_graph(
+        X, 5.0, include_self=False
+    ).toarray()
     assert_array_equal(rng, [[1.0, 1.0], [1.0, 1.0]])
     assert_array_equal(rng_not_self, [[0.0, 1.0], [1.0, 0.0]])
 
@@ -1967,10 +1966,10 @@ def test_same_radius_neighbors_parallel(algorithm):
     assert_allclose(graph, graph_parallel)
 
 
-@pytest.mark.parametrize("backend", JOBLIB_BACKENDS)
+@pytest.mark.parametrize("backend", ["threading", "loky"])
 @pytest.mark.parametrize("algorithm", ALGORITHMS)
 def test_knn_forcing_backend(backend, algorithm):
-    # Non-regression test which ensure the knn methods are properly working
+    # Non-regression test which ensures the knn methods are properly working
     # even when forcing the global joblib backend.
     with joblib.parallel_backend(backend):
         X, y = datasets.make_classification(
@@ -1979,12 +1978,12 @@ def test_knn_forcing_backend(backend, algorithm):
         X_train, X_test, y_train, y_test = train_test_split(X, y)
 
         clf = neighbors.KNeighborsClassifier(
-            n_neighbors=3, algorithm=algorithm, n_jobs=3
+            n_neighbors=3, algorithm=algorithm, n_jobs=2
         )
         clf.fit(X_train, y_train)
         clf.predict(X_test)
         clf.kneighbors(X_test)
-        clf.kneighbors_graph(X_test, mode="distance").toarray()
+        clf.kneighbors_graph(X_test, mode="distance")
 
 
 def test_dtype_convert():
@@ -2000,7 +1999,7 @@ def test_dtype_convert():
 def test_sparse_metric_callable():
     def sparse_metric(x, y):  # Metric accepting sparse matrix input (only)
         assert issparse(x) and issparse(y)
-        return x.dot(y.T).A.item()
+        return x.dot(y.T).toarray().item()
 
     X = csr_matrix(
         [[1, 1, 1, 1, 1], [1, 0, 1, 0, 1], [0, 0, 1, 0, 0]]  # Population matrix
@@ -2197,3 +2196,36 @@ def _weights(dist):
     est = KNeighborsRegressor(n_neighbors=1, algorithm="brute", weights=_weights)
     est.fit(X, y)
     assert_allclose(est.predict([[0, 2.5]]), [6])
+
+
+def test_predict_dataframe():
+    """Check that KNN predict works with dataframes
+
+    non-regression test for issue #26768
+    """
+    pd = pytest.importorskip("pandas")
+
+    X = pd.DataFrame(np.array([[1, 2], [3, 4], [5, 6], [7, 8]]), columns=["a", "b"])
+    y = np.array([1, 2, 3, 4])
+
+    knn = neighbors.KNeighborsClassifier(n_neighbors=2).fit(X, y)
+    knn.predict(X)
+
+
+def test_nearest_neighbours_works_with_p_less_than_1():
+    """Check that NearestNeighbors works with :math:`p \\in (0,1)` when `algorithm`
+    is `"auto"` or `"brute"` regardless of the dtype of X.
+
+    Non-regression test for issue #26548
+    """
+    X = np.array([[1.0, 0.0], [0.0, 0.0], [0.0, 1.0]])
+    neigh = neighbors.NearestNeighbors(
+        n_neighbors=3, algorithm="brute", metric_params={"p": 0.5}
+    )
+    neigh.fit(X)
+
+    y = neigh.radius_neighbors(X[0].reshape(1, -1), radius=4, return_distance=False)
+    assert_allclose(y[0], [0, 1, 2])
+
+    y = neigh.kneighbors(X[0].reshape(1, -1), return_distance=False)
+    assert_allclose(y[0], [0, 1, 2])
diff --git a/sklearn/neighbors/tests/test_neighbors_pipeline.py b/sklearn/neighbors/tests/test_neighbors_pipeline.py
index 905f206770769..1d01a0d0a60a8 100644
--- a/sklearn/neighbors/tests/test_neighbors_pipeline.py
+++ b/sklearn/neighbors/tests/test_neighbors_pipeline.py
@@ -7,23 +7,20 @@
 
 import numpy as np
 
-from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.base import clone
+from sklearn.cluster import DBSCAN, SpectralClustering
 from sklearn.cluster.tests.common import generate_clustered_data
 from sklearn.datasets import make_blobs
+from sklearn.manifold import TSNE, Isomap, SpectralEmbedding
+from sklearn.neighbors import (
+    KNeighborsRegressor,
+    KNeighborsTransformer,
+    LocalOutlierFactor,
+    RadiusNeighborsRegressor,
+    RadiusNeighborsTransformer,
+)
 from sklearn.pipeline import make_pipeline
-from sklearn.base import clone
-
-from sklearn.neighbors import KNeighborsTransformer
-from sklearn.neighbors import RadiusNeighborsTransformer
-
-from sklearn.cluster import DBSCAN
-from sklearn.cluster import SpectralClustering
-from sklearn.neighbors import KNeighborsRegressor
-from sklearn.neighbors import RadiusNeighborsRegressor
-from sklearn.neighbors import LocalOutlierFactor
-from sklearn.manifold import SpectralEmbedding
-from sklearn.manifold import Isomap
-from sklearn.manifold import TSNE
+from sklearn.utils._testing import assert_array_almost_equal
 
 
 def test_spectral_clustering():
diff --git a/sklearn/neighbors/tests/test_neighbors_tree.py b/sklearn/neighbors/tests/test_neighbors_tree.py
index fca0049669c6a..590e72ab785d2 100644
--- a/sklearn/neighbors/tests/test_neighbors_tree.py
+++ b/sklearn/neighbors/tests/test_neighbors_tree.py
@@ -1,28 +1,39 @@
 # License: BSD 3 clause
 
-import pickle
 import itertools
+import pickle
 
 import numpy as np
 import pytest
+from numpy.testing import assert_allclose, assert_array_almost_equal
 
 from sklearn.metrics import DistanceMetric
 from sklearn.neighbors._ball_tree import (
     BallTree,
     kernel_norm,
+)
+from sklearn.neighbors._ball_tree import (
     NeighborsHeap as NeighborsHeapBT,
-    simultaneous_sort as simultaneous_sort_bt,
+)
+from sklearn.neighbors._ball_tree import (
     nodeheap_sort as nodeheap_sort_bt,
 )
+from sklearn.neighbors._ball_tree import (
+    simultaneous_sort as simultaneous_sort_bt,
+)
 from sklearn.neighbors._kd_tree import (
     KDTree,
+)
+from sklearn.neighbors._kd_tree import (
     NeighborsHeap as NeighborsHeapKDT,
-    simultaneous_sort as simultaneous_sort_kdt,
+)
+from sklearn.neighbors._kd_tree import (
     nodeheap_sort as nodeheap_sort_kdt,
 )
-
+from sklearn.neighbors._kd_tree import (
+    simultaneous_sort as simultaneous_sort_kdt,
+)
 from sklearn.utils import check_random_state
-from numpy.testing import assert_array_almost_equal, assert_allclose
 
 rng = np.random.RandomState(42)
 V_mahalanobis = rng.rand(3, 3)
diff --git a/sklearn/neighbors/tests/test_quad_tree.py b/sklearn/neighbors/tests/test_quad_tree.py
index bba79e2c8ee1a..be9a4c5fe549d 100644
--- a/sklearn/neighbors/tests/test_quad_tree.py
+++ b/sklearn/neighbors/tests/test_quad_tree.py
@@ -1,6 +1,6 @@
 import pickle
-import numpy as np
 
+import numpy as np
 import pytest
 
 from sklearn.neighbors._quad_tree import _QuadTree
diff --git a/sklearn/neural_network/__init__.py b/sklearn/neural_network/__init__.py
index 7f6bad7bbd7e7..0b321b605de0b 100644
--- a/sklearn/neural_network/__init__.py
+++ b/sklearn/neural_network/__init__.py
@@ -5,9 +5,7 @@
 
 # License: BSD 3 clause
 
+from ._multilayer_perceptron import MLPClassifier, MLPRegressor
 from ._rbm import BernoulliRBM
 
-from ._multilayer_perceptron import MLPClassifier
-from ._multilayer_perceptron import MLPRegressor
-
 __all__ = ["BernoulliRBM", "MLPClassifier", "MLPRegressor"]
diff --git a/sklearn/neural_network/_base.py b/sklearn/neural_network/_base.py
index 0e40739556e18..73d62f9543e98 100644
--- a/sklearn/neural_network/_base.py
+++ b/sklearn/neural_network/_base.py
@@ -5,7 +5,6 @@
 # License: BSD 3 clause
 
 import numpy as np
-
 from scipy.special import expit as logistic_sigmoid
 from scipy.special import xlogy
 
diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
index fb8eab2f1776d..d64593c27d6f5 100644
--- a/sklearn/neural_network/_multilayer_perceptron.py
+++ b/sklearn/neural_network/_multilayer_perceptron.py
@@ -6,40 +6,44 @@
 #          Jiyuan Qian
 # License: BSD 3 clause
 
-from numbers import Integral, Real
-import numpy as np
-
-from abc import ABCMeta, abstractmethod
 import warnings
+from abc import ABCMeta, abstractmethod
 from itertools import chain
+from numbers import Integral, Real
 
+import numpy as np
 import scipy.optimize
 
 from ..base import (
     BaseEstimator,
     ClassifierMixin,
     RegressorMixin,
+    _fit_context,
+    is_classifier,
 )
-from ..base import is_classifier
-from ..base import _fit_context
-from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS
-from ._stochastic_optimizers import SGDOptimizer, AdamOptimizer
+from ..exceptions import ConvergenceWarning
 from ..metrics import accuracy_score, r2_score
 from ..model_selection import train_test_split
 from ..preprocessing import LabelBinarizer
-from ..utils import gen_batches, check_random_state
-from ..utils import shuffle
-from ..utils import _safe_indexing
-from ..utils import column_or_1d
-from ..exceptions import ConvergenceWarning
+from ..utils import (
+    _safe_indexing,
+    check_random_state,
+    column_or_1d,
+    gen_batches,
+    shuffle,
+)
+from ..utils._param_validation import Interval, Options, StrOptions
 from ..utils.extmath import safe_sparse_dot
-from ..utils.validation import check_is_fitted
-from ..utils.multiclass import _check_partial_fit_first_call, unique_labels
-from ..utils.multiclass import type_of_target
-from ..utils.optimize import _check_optimize_result
 from ..utils.metaestimators import available_if
-from ..utils._param_validation import StrOptions, Options, Interval
-
+from ..utils.multiclass import (
+    _check_partial_fit_first_call,
+    type_of_target,
+    unique_labels,
+)
+from ..utils.optimize import _check_optimize_result
+from ..utils.validation import check_is_fitted
+from ._base import ACTIVATIONS, DERIVATIVES, LOSS_FUNCTIONS
+from ._stochastic_optimizers import AdamOptimizer, SGDOptimizer
 
 _STOCHASTIC_SOLVERS = ["sgd", "adam"]
 
diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py
index 2ded6533d8d96..39d30ab41493b 100644
--- a/sklearn/neural_network/_rbm.py
+++ b/sklearn/neural_network/_rbm.py
@@ -14,16 +14,16 @@
 import scipy.sparse as sp
 from scipy.special import expit  # logistic function
 
-from ..base import BaseEstimator
-from ..base import TransformerMixin
-from ..base import ClassNamePrefixFeaturesOutMixin
-from ..base import _fit_context
-from ..utils import check_random_state
-from ..utils import gen_even_slices
-from ..utils.extmath import safe_sparse_dot
-from ..utils.extmath import log_logistic
-from ..utils.validation import check_is_fitted
+from ..base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from ..utils import check_random_state, gen_even_slices
 from ..utils._param_validation import Interval
+from ..utils.extmath import log_logistic, safe_sparse_dot
+from ..utils.validation import check_is_fitted
 
 
 class BernoulliRBM(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
diff --git a/sklearn/neural_network/tests/test_base.py b/sklearn/neural_network/tests/test_base.py
index 32aa7f1fee917..af7b38e899907 100644
--- a/sklearn/neural_network/tests/test_base.py
+++ b/sklearn/neural_network/tests/test_base.py
@@ -1,8 +1,7 @@
-import pytest
 import numpy as np
+import pytest
 
-from sklearn.neural_network._base import binary_log_loss
-from sklearn.neural_network._base import log_loss
+from sklearn.neural_network._base import binary_log_loss, log_loss
 
 
 def test_binary_log_loss_1_prob_finite():
diff --git a/sklearn/neural_network/tests/test_mlp.py b/sklearn/neural_network/tests/test_mlp.py
index 01fd936eb8517..348bcf93d6cd5 100644
--- a/sklearn/neural_network/tests/test_mlp.py
+++ b/sklearn/neural_network/tests/test_mlp.py
@@ -5,33 +5,33 @@
 # Author: Issam H. Laradji
 # License: BSD 3 clause
 
-import pytest
+import re
 import sys
 import warnings
-import re
+from io import StringIO
 
-import numpy as np
 import joblib
-
+import numpy as np
+import pytest
 from numpy.testing import (
+    assert_allclose,
     assert_almost_equal,
     assert_array_equal,
-    assert_allclose,
 )
+from scipy.sparse import csr_matrix
 
-from sklearn.datasets import load_digits, load_iris
-from sklearn.datasets import make_regression, make_multilabel_classification
+from sklearn.datasets import (
+    load_digits,
+    load_iris,
+    make_multilabel_classification,
+    make_regression,
+)
 from sklearn.exceptions import ConvergenceWarning
-from io import StringIO
 from sklearn.metrics import roc_auc_score
-from sklearn.neural_network import MLPClassifier
-from sklearn.neural_network import MLPRegressor
-from sklearn.preprocessing import LabelBinarizer
-from sklearn.preprocessing import MinMaxScaler, scale
-from scipy.sparse import csr_matrix
+from sklearn.neural_network import MLPClassifier, MLPRegressor
+from sklearn.preprocessing import LabelBinarizer, MinMaxScaler, scale
 from sklearn.utils._testing import ignore_warnings
 
-
 ACTIVATION_TYPES = ["identity", "logistic", "tanh", "relu"]
 
 X_digits, y_digits = load_digits(n_class=3, return_X_y=True)
diff --git a/sklearn/neural_network/tests/test_rbm.py b/sklearn/neural_network/tests/test_rbm.py
index 0412d1efff8e3..88429e3ae1901 100644
--- a/sklearn/neural_network/tests/test_rbm.py
+++ b/sklearn/neural_network/tests/test_rbm.py
@@ -1,18 +1,18 @@
-import sys
 import re
-import pytest
+import sys
+from io import StringIO
 
 import numpy as np
+import pytest
 from scipy.sparse import csc_matrix, csr_matrix, lil_matrix
+
+from sklearn.datasets import load_digits
+from sklearn.neural_network import BernoulliRBM
 from sklearn.utils._testing import (
+    assert_allclose,
     assert_almost_equal,
     assert_array_equal,
-    assert_allclose,
 )
-
-from sklearn.datasets import load_digits
-from io import StringIO
-from sklearn.neural_network import BernoulliRBM
 from sklearn.utils.validation import assert_all_finite
 
 Xdigits, _ = load_digits(return_X_y=True)
diff --git a/sklearn/neural_network/tests/test_stochastic_optimizers.py b/sklearn/neural_network/tests/test_stochastic_optimizers.py
index e876892f28daf..58a9f0c7dda13 100644
--- a/sklearn/neural_network/tests/test_stochastic_optimizers.py
+++ b/sklearn/neural_network/tests/test_stochastic_optimizers.py
@@ -1,13 +1,12 @@
 import numpy as np
 
 from sklearn.neural_network._stochastic_optimizers import (
+    AdamOptimizer,
     BaseOptimizer,
     SGDOptimizer,
-    AdamOptimizer,
 )
 from sklearn.utils._testing import assert_array_equal
 
-
 shapes = [(4, 6), (6, 8), (7, 8, 9)]
 
 
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 43b6b7eb0c939..ed23a3c33bd70 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -15,25 +15,21 @@
 import numpy as np
 from scipy import sparse
 
-from .base import clone, TransformerMixin
-from .base import _fit_context
+from .base import TransformerMixin, _fit_context, clone
+from .exceptions import NotFittedError
 from .preprocessing import FunctionTransformer
-from .utils._estimator_html_repr import _VisualBlock
-from .utils.metaestimators import available_if
 from .utils import (
     Bunch,
     _print_elapsed_time,
+    check_pandas_support,
 )
-from .utils._tags import _safe_tags
-from .utils.validation import check_memory
-from .utils.validation import check_is_fitted
-from .utils import check_pandas_support
+from .utils._estimator_html_repr import _VisualBlock
 from .utils._param_validation import HasMethods, Hidden
-from .utils._set_output import _safe_set_output, _get_output_config
-from .utils.parallel import delayed, Parallel
-from .exceptions import NotFittedError
-
-from .utils.metaestimators import _BaseComposition
+from .utils._set_output import _get_output_config, _safe_set_output
+from .utils._tags import _safe_tags
+from .utils.metaestimators import _BaseComposition, available_if
+from .utils.parallel import Parallel, delayed
+from .utils.validation import check_is_fitted, check_memory
 
 __all__ = ["Pipeline", "FeatureUnion", "make_pipeline", "make_union"]
 
@@ -69,6 +65,12 @@ class Pipeline(_BaseComposition):
     to another estimator, or a transformer removed by setting it to
     `'passthrough'` or `None`.
 
+    For an example use case of `Pipeline` combined with
+    :class:`~sklearn.model_selection.GridSearchCV`, refer to
+    :ref:`sphx_glr_auto_examples_compose_plot_compare_reduction.py`. The
+    example :ref:`sphx_glr_auto_examples_compose_plot_digits_pipe.py` shows how
+    to grid search on a pipeline using `'__'` as a separator in the parameter names.
+
     Read more in the :ref:`User Guide <pipeline>`.
 
     .. versionadded:: 0.5
@@ -135,10 +137,11 @@ class Pipeline(_BaseComposition):
     >>> pipe = Pipeline([('scaler', StandardScaler()), ('svc', SVC())])
     >>> # The pipeline can be used as any other estimator
     >>> # and avoids leaking the test set into the train set
-    >>> pipe.fit(X_train, y_train)
-    Pipeline(steps=[('scaler', StandardScaler()), ('svc', SVC())])
-    >>> pipe.score(X_test, y_test)
+    >>> pipe.fit(X_train, y_train).score(X_test, y_test)
     0.88
+    >>> # An estimator's parameter can be set using '__' syntax
+    >>> pipe.set_params(svc__C=10).fit(X_train, y_train).score(X_test, y_test)
+    0.76
     """
 
     # BaseEstimator interface
@@ -1055,6 +1058,13 @@ class FeatureUnion(TransformerMixin, _BaseComposition):
     >>> union.fit_transform(X)
     array([[ 1.5       ,  3.0...,  0.8...],
            [-1.5       ,  5.7..., -0.4...]])
+    >>> # An estimator's parameter can be set using '__' syntax
+    >>> union.set_params(pca__n_components=1).fit_transform(X)
+    array([[ 1.5       ,  3.0...],
+           [-1.5       ,  5.7...]])
+
+    For a more detailed example of usage, see
+    :ref:`sphx_glr_auto_examples_compose_plot_feature_union.py`.
     """
 
     _required_parameters = ["transformer_list"]
@@ -1366,11 +1376,12 @@ def __getitem__(self, name):
 
 
 def make_union(*transformers, n_jobs=None, verbose=False):
-    """Construct a FeatureUnion from the given transformers.
+    """Construct a :class:`FeatureUnion` from the given transformers.
 
-    This is a shorthand for the FeatureUnion constructor; it does not require,
-    and does not permit, naming the transformers. Instead, they will be given
-    names automatically based on their types. It also does not allow weighting.
+    This is a shorthand for the :class:`FeatureUnion` constructor; it does not
+    require, and does not permit, naming the transformers. Instead, they will
+    be given names automatically based on their types. It also does not allow
+    weighting.
 
     Parameters
     ----------
diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py
index 221c0701cb1d3..c730a71260808 100644
--- a/sklearn/preprocessing/__init__.py
+++ b/sklearn/preprocessing/__init__.py
@@ -3,42 +3,33 @@
 normalization, binarization methods.
 """
 
+from ._data import (
+    Binarizer,
+    KernelCenterer,
+    MaxAbsScaler,
+    MinMaxScaler,
+    Normalizer,
+    PowerTransformer,
+    QuantileTransformer,
+    RobustScaler,
+    StandardScaler,
+    add_dummy_feature,
+    binarize,
+    maxabs_scale,
+    minmax_scale,
+    normalize,
+    power_transform,
+    quantile_transform,
+    robust_scale,
+    scale,
+)
+from ._discretization import KBinsDiscretizer
+from ._encoders import OneHotEncoder, OrdinalEncoder
 from ._function_transformer import FunctionTransformer
-
-from ._data import Binarizer
-from ._data import KernelCenterer
-from ._data import MinMaxScaler
-from ._data import MaxAbsScaler
-from ._data import Normalizer
-from ._data import RobustScaler
-from ._data import StandardScaler
-from ._data import QuantileTransformer
-from ._data import add_dummy_feature
-from ._data import binarize
-from ._data import normalize
-from ._data import scale
-from ._data import robust_scale
-from ._data import maxabs_scale
-from ._data import minmax_scale
-from ._data import quantile_transform
-from ._data import power_transform
-from ._data import PowerTransformer
-
-from ._encoders import OneHotEncoder
-from ._encoders import OrdinalEncoder
+from ._label import LabelBinarizer, LabelEncoder, MultiLabelBinarizer, label_binarize
+from ._polynomial import PolynomialFeatures, SplineTransformer
 from ._target_encoder import TargetEncoder
 
-from ._label import label_binarize
-from ._label import LabelBinarizer
-from ._label import LabelEncoder
-from ._label import MultiLabelBinarizer
-
-from ._discretization import KBinsDiscretizer
-
-from ._polynomial import PolynomialFeatures
-from ._polynomial import SplineTransformer
-
-
 __all__ = [
     "Binarizer",
     "FunctionTransformer",
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 139022a9897e6..87ac8cd2c9819 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -12,41 +12,37 @@
 from numbers import Integral, Real
 
 import numpy as np
-from scipy import sparse
-from scipy import stats
-from scipy import optimize
+from scipy import optimize, sparse, stats
 from scipy.special import boxcox
 
 from ..base import (
     BaseEstimator,
-    TransformerMixin,
-    OneToOneFeatureMixin,
     ClassNamePrefixFeaturesOutMixin,
+    OneToOneFeatureMixin,
+    TransformerMixin,
     _fit_context,
 )
 from ..utils import check_array
 from ..utils._param_validation import Interval, Options, StrOptions, validate_params
 from ..utils.extmath import _incremental_mean_and_var, row_norms
-from ..utils.sparsefuncs_fast import (
-    inplace_csr_row_normalize_l1,
-    inplace_csr_row_normalize_l2,
-)
 from ..utils.sparsefuncs import (
+    incr_mean_variance_axis,
     inplace_column_scale,
     mean_variance_axis,
-    incr_mean_variance_axis,
     min_max_axis,
 )
+from ..utils.sparsefuncs_fast import (
+    inplace_csr_row_normalize_l1,
+    inplace_csr_row_normalize_l2,
+)
 from ..utils.validation import (
+    FLOAT_DTYPES,
+    _check_sample_weight,
     check_is_fitted,
     check_random_state,
-    _check_sample_weight,
-    FLOAT_DTYPES,
 )
-
 from ._encoders import OneHotEncoder
 
-
 BOUNDS_THRESHOLD = 1e-7
 
 __all__ = [
@@ -128,7 +124,8 @@ def _handle_zeros_in_scale(scale, copy=True, constant_mask=None):
         "with_mean": ["boolean"],
         "with_std": ["boolean"],
         "copy": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
     """Standardize a dataset along any axis.
@@ -191,8 +188,7 @@ def scale(X, *, axis=0, with_mean=True, with_std=True, copy=True):
     affect model performance.
 
     For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
+    see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
 
     .. warning:: Risk of data leak
 
@@ -294,6 +290,12 @@ class MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     This transformation is often used as an alternative to zero mean,
     unit variance scaling.
 
+    `MinMaxScaler` doesn't reduce the effect of outliers, but it linearily
+    scales them down into a fixed range, where the largest occuring data point
+    corresponds to the maximum value and the smallest one corresponds to the
+    minimum value. For an example visualization, refer to :ref:`Compare
+    MinMaxScaler with other scalers <plot_all_scaling_minmax_scaler_section>`.
+
     Read more in the :ref:`User Guide <preprocessing_scaler>`.
 
     Parameters
@@ -367,10 +369,6 @@ class MinMaxScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     NaNs are treated as missing values: disregarded in fit, and maintained in
     transform.
 
-    For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
-
     Examples
     --------
     >>> from sklearn.preprocessing import MinMaxScaler
@@ -559,7 +557,8 @@ def _more_tags(self):
     {
         "X": ["array-like"],
         "axis": [Options(Integral, {0, 1})],
-    }
+    },
+    prefer_skip_nested_validation=False,
 )
 def minmax_scale(X, feature_range=(0, 1), *, axis=0, copy=True):
     """Transform features by scaling each feature to a given range.
@@ -631,8 +630,7 @@ def minmax_scale(X, feature_range=(0, 1), *, axis=0, copy=True):
     Notes
     -----
     For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
+    see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
     """
     # Unlike the scaler object, this function allows 1d input.
     # If copy is required, it will be done inside the scaler object.
@@ -685,6 +683,11 @@ class StandardScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     than others, it might dominate the objective function and make the
     estimator unable to learn from other features correctly as expected.
 
+    `StandardScaler` is sensitive to outliers, and the features may scale
+    differently from each other in the presence of outliers. For an example
+    visualization, refer to :ref:`Compare StandardScaler with other scalers
+    <plot_all_scaling_standard_scaler_section>`.
+
     This scaler can also be applied to sparse CSR or CSC matrices by passing
     `with_mean=False` to avoid breaking the sparsity structure of the data.
 
@@ -723,11 +726,12 @@ class StandardScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
 
     mean_ : ndarray of shape (n_features,) or None
         The mean value for each feature in the training set.
-        Equal to ``None`` when ``with_mean=False``.
+        Equal to ``None`` when ``with_mean=False`` and ``with_std=False``.
 
     var_ : ndarray of shape (n_features,) or None
         The variance for each feature in the training set. Used to compute
-        `scale_`. Equal to ``None`` when ``with_std=False``.
+        `scale_`. Equal to ``None`` when ``with_mean=False`` and
+        ``with_std=False``.
 
     n_features_in_ : int
         Number of features seen during :term:`fit`.
@@ -765,10 +769,6 @@ class StandardScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     `numpy.std(x, ddof=0)`. Note that the choice of `ddof` is unlikely to
     affect model performance.
 
-    For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
-
     Examples
     --------
     >>> from sklearn.preprocessing import StandardScaler
@@ -1082,6 +1082,10 @@ class MaxAbsScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
 
     This scaler can also be applied to sparse CSR or CSC matrices.
 
+    `MaxAbsScaler` doesn't reduce the effect of outliers; it only linearily
+    scales them down. For an example visualization, refer to :ref:`Compare
+    MaxAbsScaler with other scalers <plot_all_scaling_max_abs_scaler_section>`.
+
     .. versionadded:: 0.17
 
     Parameters
@@ -1125,10 +1129,6 @@ class MaxAbsScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     NaNs are treated as missing values: disregarded in fit, and maintained in
     transform.
 
-    For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
-
     Examples
     --------
     >>> from sklearn.preprocessing import MaxAbsScaler
@@ -1294,8 +1294,8 @@ def _more_tags(self):
     {
         "X": ["array-like", "sparse matrix"],
         "axis": [Options(Integral, {0, 1})],
-        "copy": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=False,
 )
 def maxabs_scale(X, *, axis=0, copy=True):
     """Scale each feature to the [-1, 1] range without breaking the sparsity.
@@ -1348,8 +1348,7 @@ def maxabs_scale(X, *, axis=0, copy=True):
     and maintained during the data transformation.
 
     For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
+    see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
     """
     # Unlike the scaler object, this function allows 1d input.
 
@@ -1392,11 +1391,13 @@ class RobustScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     set. Median and interquartile range are then stored to be used on
     later data using the :meth:`transform` method.
 
-    Standardization of a dataset is a common requirement for many
-    machine learning estimators. Typically this is done by removing the mean
-    and scaling to unit variance. However, outliers can often influence the
-    sample mean / variance in a negative way. In such cases, the median and
-    the interquartile range often give better results.
+    Standardization of a dataset is a common preprocessing for many machine
+    learning estimators. Typically this is done by removing the mean and
+    scaling to unit variance. However, outliers can often influence the sample
+    mean / variance in a negative way. In such cases, using the median and the
+    interquartile range often give better results. For an example visualization
+    and comparison to other scalers, refer to :ref:`Compare RobustScaler with
+    other scalers <plot_all_scaling_robust_scaler_section>`.
 
     .. versionadded:: 0.17
 
@@ -1467,9 +1468,6 @@ class RobustScaler(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
 
     Notes
     -----
-    For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
 
     https://en.wikipedia.org/wiki/Median
     https://en.wikipedia.org/wiki/Interquartile_range
@@ -1649,7 +1647,8 @@ def _more_tags(self):
 
 
 @validate_params(
-    {"X": ["array-like", "sparse matrix"], "axis": [Options(Integral, {0, 1})]}
+    {"X": ["array-like", "sparse matrix"], "axis": [Options(Integral, {0, 1})]},
+    prefer_skip_nested_validation=False,
 )
 def robust_scale(
     X,
@@ -1731,8 +1730,7 @@ def robust_scale(
     To avoid memory copy the caller should pass a CSR matrix.
 
     For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
+    see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
 
     .. warning:: Risk of data leak
 
@@ -1784,7 +1782,8 @@ def robust_scale(
         "axis": [Options(Integral, {0, 1})],
         "copy": ["boolean"],
         "return_norm": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False):
     """Scale input vectors individually to unit norm (vector length).
@@ -1832,8 +1831,7 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False):
     Notes
     -----
     For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
+    see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
     """
     if axis == 0:
         sparse_format = "csc"
@@ -1903,6 +1901,9 @@ class Normalizer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     of the vectors and is the base similarity metric for the Vector
     Space Model commonly used by the Information Retrieval community.
 
+    For an example visualization, refer to :ref:`Compare Normalizer with other
+    scalers <plot_all_scaling_normalizer_section>`.
+
     Read more in the :ref:`User Guide <preprocessing_normalization>`.
 
     Parameters
@@ -1941,10 +1942,6 @@ class Normalizer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     :meth:`transform`, as parameter validation is only performed in
     :meth:`fit`.
 
-    For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
-
     Examples
     --------
     >>> from sklearn.preprocessing import Normalizer
@@ -2022,7 +2019,8 @@ def _more_tags(self):
         "X": ["array-like", "sparse matrix"],
         "threshold": [Interval(Real, None, None, closed="neither")],
         "copy": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def binarize(X, *, threshold=0.0, copy=True):
     """Boolean thresholding of array-like or scipy.sparse matrix.
@@ -2359,7 +2357,8 @@ def _more_tags(self):
     {
         "X": ["array-like", "sparse matrix"],
         "value": [Interval(Real, None, None, closed="neither")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def add_dummy_feature(X, value=1.0):
     """Augment dataset with an additional dummy feature.
@@ -2391,7 +2390,7 @@ def add_dummy_feature(X, value=1.0):
     n_samples, n_features = X.shape
     shape = (n_samples, n_features + 1)
     if sparse.issparse(X):
-        if sparse.isspmatrix_coo(X):
+        if X.format == "coo":
             # Shift columns to the right.
             col = X.col + 1
             # Column indices of dummy feature are 0 everywhere.
@@ -2401,7 +2400,7 @@ def add_dummy_feature(X, value=1.0):
             # Prepend the dummy feature n_samples times.
             data = np.concatenate((np.full(n_samples, value), X.data))
             return sparse.coo_matrix((data, (row, col)), shape)
-        elif sparse.isspmatrix_csc(X):
+        elif X.format == "csc":
             # Shift index pointers since we need to add n_samples elements.
             indptr = X.indptr + n_samples
             # indptr[0] must be 0.
@@ -2436,6 +2435,9 @@ class QuantileTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator)
     correlations between variables measured at the same scale but renders
     variables measured at different scales more directly comparable.
 
+    For example visualizations, refer to :ref:`Compare QuantileTransformer with
+    other scalers <plot_all_scaling_quantile_transformer_section>`.
+
     Read more in the :ref:`User Guide <preprocessing_transformer>`.
 
     .. versionadded:: 0.19
@@ -2513,10 +2515,6 @@ class QuantileTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator)
     NaNs are treated as missing values: disregarded in fit, and maintained in
     transform.
 
-    For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
-
     Examples
     --------
     >>> import numpy as np
@@ -2849,7 +2847,8 @@ def _more_tags(self):
 
 
 @validate_params(
-    {"X": ["array-like", "sparse matrix"], "axis": [Options(Integral, {0, 1})]}
+    {"X": ["array-like", "sparse matrix"], "axis": [Options(Integral, {0, 1})]},
+    prefer_skip_nested_validation=False,
 )
 def quantile_transform(
     X,
@@ -2964,8 +2963,7 @@ def quantile_transform(
         LogisticRegression())`.
 
     For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
+    see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
 
     Examples
     --------
@@ -3009,6 +3007,12 @@ class PowerTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     By default, zero-mean, unit-variance normalization is applied to the
     transformed data.
 
+    For an example visualization, refer to :ref:`Compare PowerTransformer with
+    other scalers <plot_all_scaling_power_transformer_section>`. To see the
+    effect of Box-Cox and Yeo-Johnson transformations on different
+    distributions, see:
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_map_data_to_normal.py`.
+
     Read more in the :ref:`User Guide <preprocessing_transformer>`.
 
     .. versionadded:: 0.20
@@ -3056,19 +3060,16 @@ class PowerTransformer(OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
     NaNs are treated as missing values: disregarded in ``fit``, and maintained
     in ``transform``.
 
-    For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
-
     References
     ----------
 
-    .. [1] I.K. Yeo and R.A. Johnson, "A new family of power transformations to
-           improve normality or symmetry." Biometrika, 87(4), pp.954-959,
-           (2000).
+    .. [1] :doi:`I.K. Yeo and R.A. Johnson, "A new family of power
+           transformations to improve normality or symmetry." Biometrika,
+           87(4), pp.954-959, (2000). <10.1093/biomet/87.4.954>`
 
-    .. [2] G.E.P. Box and D.R. Cox, "An Analysis of Transformations", Journal
-           of the Royal Statistical Society B, 26, 211-252 (1964).
+    .. [2] :doi:`G.E.P. Box and D.R. Cox, "An Analysis of Transformations",
+           Journal of the Royal Statistical Society B, 26, 211-252 (1964).
+           <10.1111/j.2517-6161.1964.tb00553.x>`
 
     Examples
     --------
@@ -3410,7 +3411,10 @@ def _more_tags(self):
         return {"allow_nan": True}
 
 
-@validate_params({"X": ["array-like"]})
+@validate_params(
+    {"X": ["array-like"]},
+    prefer_skip_nested_validation=False,
+)
 def power_transform(X, method="yeo-johnson", *, standardize=True, copy=True):
     """Parametric, monotonic transformation to make data more Gaussian-like.
 
@@ -3473,8 +3477,7 @@ def power_transform(X, method="yeo-johnson", *, standardize=True, copy=True):
     in ``transform``.
 
     For a comparison of the different scalers, transformers, and normalizers,
-    see :ref:`examples/preprocessing/plot_all_scaling.py
-    <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
+    see: :ref:`sphx_glr_auto_examples_preprocessing_plot_all_scaling.py`.
 
     References
     ----------
diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py
index ac7432027f462..691ebefcaa29b 100644
--- a/sklearn/preprocessing/_discretization.py
+++ b/sklearn/preprocessing/_discretization.py
@@ -4,22 +4,23 @@
 # License: BSD
 
 
-from numbers import Integral
-import numpy as np
 import warnings
+from numbers import Integral
 
-from . import OneHotEncoder
+import numpy as np
 
-from ..base import BaseEstimator, TransformerMixin
-from ..base import _fit_context
-from ..utils._param_validation import Hidden, Interval, StrOptions, Options
-from ..utils.validation import check_array
-from ..utils.validation import check_is_fitted
-from ..utils.validation import check_random_state
-from ..utils.validation import _check_feature_names_in
-from ..utils.validation import _check_sample_weight
-from ..utils.stats import _weighted_percentile
+from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import _safe_indexing
+from ..utils._param_validation import Hidden, Interval, Options, StrOptions
+from ..utils.stats import _weighted_percentile
+from ..utils.validation import (
+    _check_feature_names_in,
+    _check_sample_weight,
+    check_array,
+    check_is_fitted,
+    check_random_state,
+)
+from ._encoders import OneHotEncoder
 
 
 class KBinsDiscretizer(TransformerMixin, BaseEstimator):
@@ -54,6 +55,9 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
         - 'kmeans': Values in each bin have the same nearest center of a 1D
           k-means cluster.
 
+        For an example of the different strategies see:
+        :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_strategies.py`.
+
     dtype : {np.float32, np.float64}, default=None
         The desired data-type for the output. If None, output dtype is
         consistent with input dtype. Only np.float32 and np.float64 are
@@ -116,6 +120,12 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator):
 
     Notes
     -----
+
+    For a visualization of discretization on different datasets refer to
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization_classification.py`.
+    On the effect of discretization on linear models see:
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_discretization.py`.
+
     In bin edges for feature ``i``, the first and last values are used only for
     ``inverse_transform``. During transform, bin edges are extended to::
 
diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py
index de3f983d7ae6f..3bd040fad0d54 100644
--- a/sklearn/preprocessing/_encoders.py
+++ b/sklearn/preprocessing/_encoders.py
@@ -3,23 +3,19 @@
 # License: BSD 3 clause
 
 import numbers
-from numbers import Integral
 import warnings
+from numbers import Integral
 
 import numpy as np
 from scipy import sparse
 
-from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
-from ..base import _fit_context
-from ..utils import check_array, is_scalar_nan, _safe_indexing
-from ..utils.validation import check_is_fitted
-from ..utils.validation import _check_feature_names_in
-from ..utils._param_validation import Interval, StrOptions, Hidden
-from ..utils._param_validation import RealNotInt
+from ..base import BaseEstimator, OneToOneFeatureMixin, TransformerMixin, _fit_context
+from ..utils import _safe_indexing, check_array, is_scalar_nan
+from ..utils._encode import _check_unknown, _encode, _get_counts, _unique
 from ..utils._mask import _get_mask
-
-from ..utils._encode import _encode, _check_unknown, _unique, _get_counts
-
+from ..utils._param_validation import Hidden, Interval, RealNotInt, StrOptions
+from ..utils._set_output import _get_output_config
+from ..utils.validation import _check_feature_names_in, check_is_fitted
 
 __all__ = ["OneHotEncoder", "OrdinalEncoder"]
 
@@ -181,11 +177,11 @@ def _transform(
         warn_on_unknown=False,
         ignore_category_indices=None,
     ):
-        self._check_feature_names(X, reset=False)
-        self._check_n_features(X, reset=False)
         X_list, n_samples, n_features = self._check_X(
             X, force_all_finite=force_all_finite
         )
+        self._check_feature_names(X, reset=False)
+        self._check_n_features(X, reset=False)
 
         X_int = np.zeros((n_samples, n_features), dtype=int)
         X_mask = np.ones((n_samples, n_features), dtype=bool)
@@ -442,7 +438,7 @@ def _map_infrequent_categories(self, X_int, X_mask, ignore_category_indices):
             X_int[rows_to_update, i] = np.take(mapping, X_int[rows_to_update, i])
 
     def _more_tags(self):
-        return {"X_types": ["categorical"]}
+        return {"X_types": ["2darray", "categorical"], "allow_nan": True}
 
 
 class OneHotEncoder(_BaseEncoder):
@@ -467,6 +463,8 @@ class OneHotEncoder(_BaseEncoder):
     instead.
 
     Read more in the :ref:`User Guide <preprocessing_categorical_features>`.
+    For a comparison of different encoders, refer to:
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py`.
 
     Parameters
     ----------
@@ -529,7 +527,7 @@ class OneHotEncoder(_BaseEncoder):
         .. versionadded:: 1.2
            `sparse` was renamed to `sparse_output`
 
-    dtype : number type, default=float
+    dtype : number type, default=np.float64
         Desired dtype of output.
 
     handle_unknown : {'error', 'ignore', 'infrequent_if_exist'}, \
@@ -778,8 +776,8 @@ def _map_drop_idx_to_infrequent(self, feature_idx, drop_idx):
         if infrequent_indices is not None and drop_idx in infrequent_indices:
             categories = self.categories_[feature_idx]
             raise ValueError(
-                f"Unable to drop category {categories[drop_idx]!r} from feature"
-                f" {feature_idx} because it is infrequent"
+                f"Unable to drop category {categories[drop_idx].item()!r} from"
+                f" feature {feature_idx} because it is infrequent"
             )
         return default_to_infrequent[drop_idx]
 
@@ -1013,6 +1011,14 @@ def transform(self, X):
             returned.
         """
         check_is_fitted(self)
+        transform_output = _get_output_config("transform", estimator=self)["dense"]
+        if transform_output == "pandas" and self.sparse_output:
+            raise ValueError(
+                "Pandas output does not support sparse data. Set sparse_output=False to"
+                " output pandas DataFrames or disable pandas output via"
+                ' `ohe.set_output(transform="default").'
+            )
+
         # validation of X happens in _check_X called by _transform
         warn_on_unknown = self.drop is not None and self.handle_unknown in {
             "ignore",
@@ -1239,6 +1245,8 @@ class OrdinalEncoder(OneToOneFeatureMixin, _BaseEncoder):
     a single column of integers (0 to n_categories - 1) per feature.
 
     Read more in the :ref:`User Guide <preprocessing_categorical_features>`.
+    For a comparison of different encoders, refer to:
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py`.
 
     .. versionadded:: 0.20
 
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index d7bf1810e61c0..f1df0f43dc96e 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -2,15 +2,14 @@
 
 import numpy as np
 
-from ..base import BaseEstimator, TransformerMixin
-from ..base import _fit_context
+from ..base import BaseEstimator, TransformerMixin, _fit_context
+from ..utils._param_validation import StrOptions
 from ..utils.metaestimators import available_if
 from ..utils.validation import (
     _allclose_dense_sparse,
     _check_feature_names_in,
     check_array,
 )
-from ..utils._param_validation import StrOptions
 
 
 def _identity(X):
diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index f656329607ee3..41494f2649a01 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -6,25 +6,22 @@
 #          Hamzeh Alsalhi <ha258@cornell.edu>
 # License: BSD 3 clause
 
-from collections import defaultdict
-from numbers import Integral
-import itertools
 import array
+import itertools
 import warnings
+from collections import defaultdict
+from numbers import Integral
 
 import numpy as np
 import scipy.sparse as sp
 
-from ..base import BaseEstimator, TransformerMixin
-from ..base import _fit_context
-from ..utils.sparsefuncs import min_max_axis
-from ..utils._param_validation import Interval, validate_params
+from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import column_or_1d
-from ..utils.validation import _num_samples, check_array, check_is_fitted
-from ..utils.multiclass import unique_labels
-from ..utils.multiclass import type_of_target
 from ..utils._encode import _encode, _unique
-
+from ..utils._param_validation import Interval, validate_params
+from ..utils.multiclass import type_of_target, unique_labels
+from ..utils.sparsefuncs import min_max_axis
+from ..utils.validation import _num_samples, check_array, check_is_fitted
 
 __all__ = [
     "label_binarize",
@@ -34,7 +31,7 @@
 ]
 
 
-class LabelEncoder(TransformerMixin, BaseEstimator):
+class LabelEncoder(TransformerMixin, BaseEstimator, auto_wrap_output_keys=None):
     """Encode target labels with value between 0 and n_classes-1.
 
     This transformer should be used to encode target values, *i.e.* `y`, and
@@ -59,8 +56,8 @@ class LabelEncoder(TransformerMixin, BaseEstimator):
     --------
     `LabelEncoder` can be used to normalize labels.
 
-    >>> from sklearn import preprocessing
-    >>> le = preprocessing.LabelEncoder()
+    >>> from sklearn.preprocessing import LabelEncoder
+    >>> le = LabelEncoder()
     >>> le.fit([1, 2, 2, 6])
     LabelEncoder()
     >>> le.classes_
@@ -73,7 +70,7 @@ class LabelEncoder(TransformerMixin, BaseEstimator):
     It can also be used to transform non-numerical labels (as long as they are
     hashable and comparable) to numerical labels.
 
-    >>> le = preprocessing.LabelEncoder()
+    >>> le = LabelEncoder()
     >>> le.fit(["paris", "paris", "tokyo", "amsterdam"])
     LabelEncoder()
     >>> list(le.classes_)
@@ -168,7 +165,7 @@ def _more_tags(self):
         return {"X_types": ["1dlabels"]}
 
 
-class LabelBinarizer(TransformerMixin, BaseEstimator):
+class LabelBinarizer(TransformerMixin, BaseEstimator, auto_wrap_output_keys=None):
     """Binarize labels in a one-vs-all fashion.
 
     Several regression and binary classification algorithms are
@@ -179,12 +176,12 @@ class LabelBinarizer(TransformerMixin, BaseEstimator):
     At learning time, this simply consists in learning one regressor
     or binary classifier per class. In doing so, one needs to convert
     multi-class labels to binary labels (belong or does not belong
-    to the class). LabelBinarizer makes this process easy with the
+    to the class). `LabelBinarizer` makes this process easy with the
     transform method.
 
     At prediction time, one assigns the class for which the corresponding
-    model gave the greatest confidence. LabelBinarizer makes this easy
-    with the inverse_transform method.
+    model gave the greatest confidence. `LabelBinarizer` makes this easy
+    with the :meth:`inverse_transform` method.
 
     Read more in the :ref:`User Guide <preprocessing_targets>`.
 
@@ -207,13 +204,13 @@ class LabelBinarizer(TransformerMixin, BaseEstimator):
 
     y_type_ : str
         Represents the type of the target data as evaluated by
-        utils.multiclass.type_of_target. Possible type are 'continuous',
-        'continuous-multioutput', 'binary', 'multiclass',
+        :func:`~sklearn.utils.multiclass.type_of_target`. Possible type are
+        'continuous', 'continuous-multioutput', 'binary', 'multiclass',
         'multiclass-multioutput', 'multilabel-indicator', and 'unknown'.
 
     sparse_input_ : bool
-        True if the input data to transform is given as a sparse matrix, False
-        otherwise.
+        `True` if the input data to transform is given as a sparse matrix,
+         `False` otherwise.
 
     See Also
     --------
@@ -224,8 +221,8 @@ class LabelBinarizer(TransformerMixin, BaseEstimator):
 
     Examples
     --------
-    >>> from sklearn import preprocessing
-    >>> lb = preprocessing.LabelBinarizer()
+    >>> from sklearn.preprocessing import LabelBinarizer
+    >>> lb = LabelBinarizer()
     >>> lb.fit([1, 2, 6, 4, 2])
     LabelBinarizer()
     >>> lb.classes_
@@ -236,7 +233,7 @@ class LabelBinarizer(TransformerMixin, BaseEstimator):
 
     Binary targets transform to a column vector
 
-    >>> lb = preprocessing.LabelBinarizer()
+    >>> lb = LabelBinarizer()
     >>> lb.fit_transform(['yes', 'no', 'no', 'yes'])
     array([[1],
            [0],
@@ -377,9 +374,9 @@ def inverse_transform(self, Y, threshold=None):
         threshold : float, default=None
             Threshold used in the binary and multi-label cases.
 
-            Use 0 when ``Y`` contains the output of decision_function
+            Use 0 when ``Y`` contains the output of :term:`decision_function`
             (classifier).
-            Use 0.5 when ``Y`` contains the output of predict_proba.
+            Use 0.5 when ``Y`` contains the output of :term:`predict_proba`.
 
             If None, the threshold is assumed to be half way between
             neg_label and pos_label.
@@ -392,10 +389,10 @@ def inverse_transform(self, Y, threshold=None):
         Notes
         -----
         In the case when the binary labels are fractional
-        (probabilistic), inverse_transform chooses the class with the
+        (probabilistic), :meth:`inverse_transform` chooses the class with the
         greatest value. Typically, this allows to use the output of a
-        linear model's decision_function method directly as the input
-        of inverse_transform.
+        linear model's :term:`decision_function` method directly as the input
+        of :meth:`inverse_transform`.
         """
         check_is_fitted(self)
 
@@ -427,7 +424,8 @@ def _more_tags(self):
         "neg_label": [Interval(Integral, None, None, closed="neither")],
         "pos_label": [Interval(Integral, None, None, closed="neither")],
         "sparse_output": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False):
     """Binarize labels in a one-vs-all fashion.
@@ -555,7 +553,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
         y = column_or_1d(y)
 
         # pick out the known labels from y
-        y_in_classes = np.in1d(y, classes)
+        y_in_classes = np.isin(y, classes)
         y_seen = y[y_in_classes]
         indices = np.searchsorted(sorted_class, y_seen)
         indptr = np.hstack((0, np.cumsum(y_in_classes)))
@@ -687,7 +685,7 @@ def _inverse_binarize_thresholding(y, output_type, classes, threshold):
         raise ValueError("{0} format is not supported".format(output_type))
 
 
-class MultiLabelBinarizer(TransformerMixin, BaseEstimator):
+class MultiLabelBinarizer(TransformerMixin, BaseEstimator, auto_wrap_output_keys=None):
     """Transform between iterable of iterables and a multilabel format.
 
     Although a list of sets or tuples is a very intuitive format for multilabel
diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 1dfee8a088114..634c1497f36bf 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -2,31 +2,32 @@
 This file contains preprocessing tools based on polynomials.
 """
 import collections
-from numbers import Integral
 from itertools import chain, combinations
 from itertools import combinations_with_replacement as combinations_w_r
+from numbers import Integral
 
 import numpy as np
 from scipy import sparse
 from scipy.interpolate import BSpline
 from scipy.special import comb
 
-from ..base import BaseEstimator, TransformerMixin
-from ..base import _fit_context
+from ..base import BaseEstimator, TransformerMixin, _fit_context
 from ..utils import check_array
-from ..utils.fixes import sp_version, parse_version
-from ..utils.validation import check_is_fitted, FLOAT_DTYPES, _check_sample_weight
-from ..utils.validation import _check_feature_names_in
 from ..utils._param_validation import Interval, StrOptions
+from ..utils.fixes import parse_version, sp_version
 from ..utils.stats import _weighted_percentile
-
+from ..utils.validation import (
+    FLOAT_DTYPES,
+    _check_feature_names_in,
+    _check_sample_weight,
+    check_is_fitted,
+)
 from ._csr_polynomial_expansion import (
-    _csr_polynomial_expansion,
     _calc_expanded_nnz,
     _calc_total_nnz,
+    _csr_polynomial_expansion,
 )
 
-
 __all__ = [
     "PolynomialFeatures",
     "SplineTransformer",
@@ -434,7 +435,7 @@ def transform(self, X):
 
         n_samples, n_features = X.shape
         max_int32 = np.iinfo(np.int32).max
-        if sparse.isspmatrix_csr(X):
+        if sparse.issparse(X) and X.format == "csr":
             if self._max_degree > 3:
                 return self.transform(X.tocsc()).tocsr()
             to_stack = []
@@ -479,9 +480,9 @@ def transform(self, X):
                         " transformer to produce fewer than 2^31 output features"
                     )
                 XP = sparse.hstack(to_stack, dtype=X.dtype, format="csr")
-        elif sparse.isspmatrix_csc(X) and self._max_degree < 4:
+        elif sparse.issparse(X) and X.format == "csc" and self._max_degree < 4:
             return self.transform(X.tocsr()).tocsc()
-        elif sparse.isspmatrix(X):
+        elif sparse.issparse(X):
             combinations = self._combinations(
                 n_features=n_features,
                 min_degree=self._min_degree,
@@ -1118,8 +1119,7 @@ def transform(self, X):
                             XBS[mask, i * n_splines + k] = linear_extr
 
             if use_sparse:
-                if not sparse.isspmatrix_csr(XBS_sparse):
-                    XBS_sparse = XBS_sparse.tocsr()
+                XBS_sparse = XBS_sparse.tocsr()
                 output_list.append(XBS_sparse)
 
         if use_sparse:
diff --git a/sklearn/preprocessing/_target_encoder.py b/sklearn/preprocessing/_target_encoder.py
index 9dd33ddfa3cce..fe9bddecba31c 100644
--- a/sklearn/preprocessing/_target_encoder.py
+++ b/sklearn/preprocessing/_target_encoder.py
@@ -1,15 +1,13 @@
-import numpy as np
+from numbers import Integral, Real
 
-from numbers import Real, Integral
+import numpy as np
 
-from ._encoders import _BaseEncoder
-from ..base import OneToOneFeatureMixin
-from ..base import _fit_context
-from ._target_encoder_fast import _fit_encoding_fast
-from ._target_encoder_fast import _fit_encoding_fast_auto_smooth
-from ..utils.validation import _check_y, check_consistent_length
-from ..utils.multiclass import type_of_target
+from ..base import OneToOneFeatureMixin, _fit_context
 from ..utils._param_validation import Interval, StrOptions
+from ..utils.multiclass import type_of_target
+from ..utils.validation import _check_y, check_consistent_length
+from ._encoders import _BaseEncoder
+from ._target_encoder_fast import _fit_encoding_fast, _fit_encoding_fast_auto_smooth
 
 
 class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
@@ -25,18 +23,23 @@ class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
     that are not seen during :meth:`fit` are encoded with the target mean, i.e.
     `target_mean_`.
 
-    Read more in the :ref:`User Guide <target_encoder>`.
+    For a demo on the importance of the `TargetEncoder` internal cross-fitting,
+    see
+    ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder_cross_val.py`.
+    For a comparison of different encoders, refer to
+    :ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py`. Read
+    more in the :ref:`User Guide <target_encoder>`.
 
     .. note::
         `fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a
-        cross-validation scheme is used in `fit_transform` for encoding. See the
-        :ref:`User Guide <target_encoder>`. for details.
+        :term:`cross fitting` scheme is used in `fit_transform` for encoding.
+        See the :ref:`User Guide <target_encoder>` for details.
 
     .. versionadded:: 1.3
 
     Parameters
     ----------
-    categories : "auto" or a list of array-like, default="auto"
+    categories : "auto" or list of shape (n_features,) of array-like, default="auto"
         Categories (unique values) per feature:
 
         - `"auto"` : Determine categories automatically from the training data.
@@ -44,7 +47,7 @@ class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
           passed categories should not mix strings and numeric values within a single
           feature, and should be sorted in case of numeric values.
 
-        The used categories is stored in the `categories_` fitted attribute.
+        The used categories are stored in the `categories_` fitted attribute.
 
     target_type : {"auto", "continuous", "binary"}, default="auto"
         Type of target.
@@ -56,26 +59,27 @@ class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
 
         .. note::
             The type of target inferred with `"auto"` may not be the desired target
-            type used for modeling. For example, if the target consistent of integers
+            type used for modeling. For example, if the target consisted of integers
             between 0 and 100, then :func:`~sklearn.utils.multiclass.type_of_target`
             will infer the target as `"multiclass"`. In this case, setting
-            `target_type="continuous"` will understand the target as a regression
+            `target_type="continuous"` will specify the target as a regression
             problem. The `target_type_` attribute gives the target type used by the
             encoder.
 
     smooth : "auto" or float, default="auto"
-        The amount of mixing of the categorical encoding with the global target mean. A
-        larger `smooth` value will put more weight on the global target mean.
+        The amount of mixing of the target mean conditioned on the value of the
+        category with the global target mean. A larger `smooth` value will put
+        more weight on the global target mean.
         If `"auto"`, then `smooth` is set to an empirical Bayes estimate.
 
     cv : int, default=5
-        Determines the number of folds in the cross-validation strategy used in
+        Determines the number of folds in the :term:`cross fitting` strategy used in
         :meth:`fit_transform`. For classification targets, `StratifiedKFold` is used
         and for continuous targets, `KFold` is used.
 
     shuffle : bool, default=True
         Whether to shuffle the data in :meth:`fit_transform` before splitting into
-        batches. Note that the samples within each split will not be shuffled.
+        folds. Note that the samples within each split will not be shuffled.
 
     random_state : int, RandomState instance or None, default=None
         When `shuffle` is True, `random_state` affects the ordering of the
@@ -87,11 +91,13 @@ class TargetEncoder(OneToOneFeatureMixin, _BaseEncoder):
     Attributes
     ----------
     encodings_ : list of shape (n_features,) of ndarray
-        For feature `i`, `encodings_[i]` is the encoding matching the
+        Encodings learnt on all of `X`.
+        For feature `i`, `encodings_[i]` are the encodings matching the
         categories listed in `categories_[i]`.
 
     categories_ : list of shape (n_features,) of ndarray
-        The categories of each feature determined during fitting
+        The categories of each feature determined during fitting or specified
+        in `categories`
         (in order of the features in `X` and corresponding with the output
         of :meth:`transform`).
 
@@ -203,8 +209,8 @@ def fit_transform(self, X, y):
 
         .. note::
             `fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a
-            cross-validation scheme is used in `fit_transform` for encoding. See the
-            :ref:`User Guide <target_encoder>`. for details.
+            :term:`cross fitting` scheme is used in `fit_transform` for encoding.
+            See the :ref:`User Guide <target_encoder>`. for details.
 
         Parameters
         ----------
@@ -259,8 +265,8 @@ def transform(self, X):
 
         .. note::
             `fit(X, y).transform(X)` does not equal `fit_transform(X, y)` because a
-            cross-validation scheme is used in `fit_transform` for encoding. See the
-            :ref:`User Guide <target_encoder>`. for details.
+            :term:`cross fitting` scheme is used in `fit_transform` for encoding.
+            See the :ref:`User Guide <target_encoder>`. for details.
 
         Parameters
         ----------
@@ -272,14 +278,14 @@ def transform(self, X):
         X_trans : ndarray of shape (n_samples, n_features)
             Transformed input.
         """
-        X_ordinal, X_valid = self._transform(
+        X_ordinal, X_known_mask = self._transform(
             X, handle_unknown="ignore", force_all_finite="allow-nan"
         )
         X_out = np.empty_like(X_ordinal, dtype=np.float64)
         self._transform_X_ordinal(
             X_out,
             X_ordinal,
-            ~X_valid,
+            ~X_known_mask,
             slice(None),
             self.encodings_,
             self.target_mean_,
@@ -298,8 +304,9 @@ def _fit_encodings_all(self, X, y):
             inferred_type_of_target = type_of_target(y, input_name="y")
             if inferred_type_of_target not in accepted_target_types:
                 raise ValueError(
-                    f"Target type was inferred to be {inferred_type_of_target!r}. Only"
-                    f" {accepted_target_types} are supported."
+                    "Unknown label type: Target type was inferred to be "
+                    f"{inferred_type_of_target!r}. Only {accepted_target_types} are "
+                    "supported."
                 )
             self.target_type_ = inferred_type_of_target
         else:
@@ -342,4 +349,13 @@ def _transform_X_ordinal(
             X_out[X_unknown_mask[:, f_idx], f_idx] = y_mean
 
     def _more_tags(self):
-        return {"requires_y": True}
+        return {
+            "requires_y": True,
+            # TargetEncoder is a special case where a transformer uses `y` but
+            # only accept binary classification and regression targets. For the
+            # purpose of common tests we use `binary_only` tag to eliminate the
+            # multiclass tests. TODO: remove this special case when multiclass
+            # support is added to TargetEncoder. xref:
+            # https://github.com/scikit-learn/scikit-learn/pull/26674
+            "binary_only": True,
+        }
diff --git a/sklearn/preprocessing/tests/test_common.py b/sklearn/preprocessing/tests/test_common.py
index 98b8dcdfe0e2a..653ce58e0b68c 100644
--- a/sklearn/preprocessing/tests/test_common.py
+++ b/sklearn/preprocessing/tests/test_common.py
@@ -1,31 +1,27 @@
 import warnings
 
-import pytest
 import numpy as np
-
+import pytest
 from scipy import sparse
 
+from sklearn.base import clone
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
-
-from sklearn.base import clone
-
-from sklearn.preprocessing import maxabs_scale
-from sklearn.preprocessing import minmax_scale
-from sklearn.preprocessing import scale
-from sklearn.preprocessing import power_transform
-from sklearn.preprocessing import quantile_transform
-from sklearn.preprocessing import robust_scale
-
-from sklearn.preprocessing import MaxAbsScaler
-from sklearn.preprocessing import MinMaxScaler
-from sklearn.preprocessing import StandardScaler
-from sklearn.preprocessing import PowerTransformer
-from sklearn.preprocessing import QuantileTransformer
-from sklearn.preprocessing import RobustScaler
-
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
+from sklearn.preprocessing import (
+    MaxAbsScaler,
+    MinMaxScaler,
+    PowerTransformer,
+    QuantileTransformer,
+    RobustScaler,
+    StandardScaler,
+    maxabs_scale,
+    minmax_scale,
+    power_transform,
+    quantile_transform,
+    robust_scale,
+    scale,
+)
+from sklearn.utils._testing import assert_allclose, assert_array_equal
 
 iris = load_iris()
 
@@ -135,13 +131,13 @@ def test_missing_value_handling(
                 warnings.simplefilter("error", RuntimeWarning)
                 Xt_sp = est_sparse.fit(X_train_sp).transform(X_test_sp)
 
-            assert_allclose(Xt_sp.A, Xt_dense)
+            assert_allclose(Xt_sp.toarray(), Xt_dense)
             with warnings.catch_warnings():
                 warnings.simplefilter("ignore", PendingDeprecationWarning)
                 warnings.simplefilter("error", RuntimeWarning)
                 Xt_inv_sp = est_sparse.inverse_transform(Xt_sp)
 
-            assert_allclose(Xt_inv_sp.A, Xt_inv_dense)
+            assert_allclose(Xt_inv_sp.toarray(), Xt_inv_dense)
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/preprocessing/tests/test_data.py b/sklearn/preprocessing/tests/test_data.py
index c00de906a7dbb..189d8875bc8f2 100644
--- a/sklearn/preprocessing/tests/test_data.py
+++ b/sklearn/preprocessing/tests/test_data.py
@@ -4,59 +4,54 @@
 #
 # License: BSD 3 clause
 
-import warnings
 import itertools
-
 import re
+import warnings
+
 import numpy as np
 import numpy.linalg as la
-from scipy import sparse, stats
-
 import pytest
+from scipy import sparse, stats
 
-from sklearn.utils import gen_batches
-
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_less
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_allclose_dense_sparse
-from sklearn.utils._testing import skip_if_32bit
-from sklearn.utils._testing import _convert_container
-
-from sklearn.utils.sparsefuncs import mean_variance_axis
-from sklearn.preprocessing import Binarizer
-from sklearn.preprocessing import KernelCenterer
-from sklearn.preprocessing import Normalizer
-from sklearn.preprocessing import normalize
-from sklearn.preprocessing import StandardScaler
-from sklearn.preprocessing import scale
-from sklearn.preprocessing import MinMaxScaler
-from sklearn.preprocessing import minmax_scale
-from sklearn.preprocessing import QuantileTransformer
-from sklearn.preprocessing import quantile_transform
-from sklearn.preprocessing import MaxAbsScaler
-from sklearn.preprocessing import maxabs_scale
-from sklearn.preprocessing import RobustScaler
-from sklearn.preprocessing import robust_scale
-from sklearn.preprocessing import add_dummy_feature
-from sklearn.preprocessing import PowerTransformer
-from sklearn.preprocessing import power_transform
-from sklearn.preprocessing._data import _handle_zeros_in_scale
-from sklearn.preprocessing._data import BOUNDS_THRESHOLD
-from sklearn.metrics.pairwise import linear_kernel
-
-from sklearn.exceptions import NotFittedError
-
+from sklearn import datasets
 from sklearn.base import clone
-from sklearn.pipeline import Pipeline
+from sklearn.exceptions import NotFittedError
+from sklearn.metrics.pairwise import linear_kernel
 from sklearn.model_selection import cross_val_predict
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import (
+    Binarizer,
+    KernelCenterer,
+    MaxAbsScaler,
+    MinMaxScaler,
+    Normalizer,
+    PowerTransformer,
+    QuantileTransformer,
+    RobustScaler,
+    StandardScaler,
+    add_dummy_feature,
+    maxabs_scale,
+    minmax_scale,
+    normalize,
+    power_transform,
+    quantile_transform,
+    robust_scale,
+    scale,
+)
+from sklearn.preprocessing._data import BOUNDS_THRESHOLD, _handle_zeros_in_scale
 from sklearn.svm import SVR
-from sklearn.utils import shuffle
-
-from sklearn import datasets
-
+from sklearn.utils import gen_batches, shuffle
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_array_less,
+    skip_if_32bit,
+)
+from sklearn.utils.sparsefuncs import mean_variance_axis
 
 iris = datasets.load_iris()
 
@@ -1851,7 +1846,7 @@ def test_normalizer_l1():
         X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
 
         assert X_norm is not X
-        assert sparse.isspmatrix_csr(X_norm)
+        assert sparse.issparse(X_norm) and X_norm.format == "csr"
 
         X_norm = toarray(X_norm)
         for i in range(3):
@@ -1898,7 +1893,7 @@ def test_normalizer_l2():
         X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
 
         assert X_norm is not X
-        assert sparse.isspmatrix_csr(X_norm)
+        assert sparse.issparse(X_norm) and X_norm.format == "csr"
 
         X_norm = toarray(X_norm)
         for i in range(3):
@@ -1946,7 +1941,7 @@ def test_normalizer_max():
         X_norm = normalizer = Normalizer(norm="l2", copy=False).transform(X)
 
         assert X_norm is not X
-        assert sparse.isspmatrix_csr(X_norm)
+        assert sparse.issparse(X_norm) and X_norm.format == "csr"
 
         X_norm = toarray(X_norm)
         for i in range(3):
@@ -2210,21 +2205,21 @@ def test_add_dummy_feature():
 def test_add_dummy_feature_coo():
     X = sparse.coo_matrix([[1, 0], [0, 1], [0, 1]])
     X = add_dummy_feature(X)
-    assert sparse.isspmatrix_coo(X), X
+    assert sparse.issparse(X) and X.format == "coo", X
     assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
 
 
 def test_add_dummy_feature_csc():
     X = sparse.csc_matrix([[1, 0], [0, 1], [0, 1]])
     X = add_dummy_feature(X)
-    assert sparse.isspmatrix_csc(X), X
+    assert sparse.issparse(X) and X.format == "csc", X
     assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
 
 
 def test_add_dummy_feature_csr():
     X = sparse.csr_matrix([[1, 0], [0, 1], [0, 1]])
     X = add_dummy_feature(X)
-    assert sparse.isspmatrix_csr(X), X
+    assert sparse.issparse(X) and X.format == "csr", X
     assert_array_equal(X.toarray(), [[1, 1, 0], [1, 0, 1], [1, 0, 1]])
 
 
diff --git a/sklearn/preprocessing/tests/test_discretization.py b/sklearn/preprocessing/tests/test_discretization.py
index c3dd03f647737..bc0b9470f4aa6 100644
--- a/sklearn/preprocessing/tests/test_discretization.py
+++ b/sklearn/preprocessing/tests/test_discretization.py
@@ -1,16 +1,16 @@
-import pytest
+import warnings
+
 import numpy as np
+import pytest
 import scipy.sparse as sp
-import warnings
 
 from sklearn import clone
-from sklearn.preprocessing import KBinsDiscretizer
-from sklearn.preprocessing import OneHotEncoder
+from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder
 from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
     assert_array_almost_equal,
     assert_array_equal,
-    assert_allclose_dense_sparse,
-    assert_allclose,
 )
 
 X = [[-2, 1.5, -4, -1], [-1, 2.5, -3, -0.5], [0, 3.5, -2, 0.5], [1, 4.5, -1, 2]]
@@ -485,7 +485,7 @@ def test_kbinsdiscretizer_subsample(strategy, global_random_seed):
     kbd_no_subsampling.set_params(subsample=None)
     kbd_no_subsampling.fit(X)
 
-    # We use a large tolerance because we can't expect the bin edges to be exactely the
+    # We use a large tolerance because we can't expect the bin edges to be exactly the
     # same when subsampling is used.
     assert_allclose(
         kbd_subsampling.bin_edges_[0], kbd_no_subsampling.bin_edges_[0], rtol=1e-2
diff --git a/sklearn/preprocessing/tests/test_encoders.py b/sklearn/preprocessing/tests/test_encoders.py
index 42c66980bfeba..fb57a1993c3b7 100644
--- a/sklearn/preprocessing/tests/test_encoders.py
+++ b/sklearn/preprocessing/tests/test_encoders.py
@@ -1,17 +1,17 @@
 import re
 
 import numpy as np
-from scipy import sparse
 import pytest
+from scipy import sparse
 
 from sklearn.exceptions import NotFittedError
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import _convert_container
+from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
 from sklearn.utils import is_scalar_nan
-
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import OrdinalEncoder
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_array_equal,
+)
 
 
 def test_one_hot_encoder_sparse_dense():
@@ -240,7 +240,7 @@ def check_categorical_onehot(X):
 
     assert_allclose(Xtr1.toarray(), Xtr2)
 
-    assert sparse.isspmatrix_csr(Xtr1)
+    assert sparse.issparse(Xtr1) and Xtr1.format == "csr"
     return Xtr1.toarray()
 
 
@@ -414,7 +414,7 @@ def test_X_is_not_1D_pandas(method):
             np.object_,
         ),
         (np.array([["A", "cat"], ["B", "cat"]]), [["A", "B"], ["cat"]], np.str_),
-        (np.array([[1, 2], [np.nan, 2]]), [[1, np.nan], [2]], np.float_),
+        (np.array([[1, 2], [np.nan, 2]]), [[1, np.nan], [2]], np.float64),
         (
             np.array([["A", np.nan], [None, np.nan]], dtype=object),
             [["A", None], [np.nan]],
@@ -1588,6 +1588,26 @@ def test_ohe_drop_first_explicit_categories(handle_unknown):
     assert_allclose(X_trans, X_expected)
 
 
+def test_ohe_more_informative_error_message():
+    """Raise informative error message when pandas output and sparse_output=True."""
+    pd = pytest.importorskip("pandas")
+    df = pd.DataFrame({"a": [1, 2, 3], "b": ["z", "b", "b"]}, columns=["a", "b"])
+
+    ohe = OneHotEncoder(sparse_output=True)
+    ohe.set_output(transform="pandas")
+
+    msg = (
+        "Pandas output does not support sparse data. Set "
+        "sparse_output=False to output pandas DataFrames or disable pandas output"
+    )
+    with pytest.raises(ValueError, match=msg):
+        ohe.fit_transform(df)
+
+    ohe.fit(df)
+    with pytest.raises(ValueError, match=msg):
+        ohe.transform(df)
+
+
 def test_ordinal_encoder_passthrough_missing_values_float_errors_dtype():
     """Test ordinal encoder with nan passthrough fails when dtype=np.int32."""
 
diff --git a/sklearn/preprocessing/tests/test_function_transformer.py b/sklearn/preprocessing/tests/test_function_transformer.py
index 5617429590657..fa19171503a1d 100644
--- a/sklearn/preprocessing/tests/test_function_transformer.py
+++ b/sklearn/preprocessing/tests/test_function_transformer.py
@@ -1,16 +1,16 @@
 import warnings
 
-import pytest
 import numpy as np
+import pytest
 from scipy import sparse
-from sklearn.utils import _safe_indexing
 
-from sklearn.preprocessing import FunctionTransformer
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import FunctionTransformer
+from sklearn.utils import _safe_indexing
 from sklearn.utils._testing import (
-    assert_array_equal,
-    assert_allclose_dense_sparse,
     _convert_container,
+    assert_allclose_dense_sparse,
+    assert_array_equal,
 )
 
 
diff --git a/sklearn/preprocessing/tests/test_label.py b/sklearn/preprocessing/tests/test_label.py
index d8566c85e7b73..633a386c75951 100644
--- a/sklearn/preprocessing/tests/test_label.py
+++ b/sklearn/preprocessing/tests/test_label.py
@@ -1,29 +1,26 @@
 import numpy as np
-
 import pytest
-
-from scipy.sparse import issparse
-from scipy.sparse import coo_matrix
-from scipy.sparse import csc_matrix
-from scipy.sparse import csr_matrix
-from scipy.sparse import dok_matrix
-from scipy.sparse import lil_matrix
-
-from sklearn.utils.multiclass import type_of_target
-
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils import _to_object_array
-
-from sklearn.preprocessing._label import LabelBinarizer
-from sklearn.preprocessing._label import MultiLabelBinarizer
-from sklearn.preprocessing._label import LabelEncoder
-from sklearn.preprocessing._label import label_binarize
-
-from sklearn.preprocessing._label import _inverse_binarize_thresholding
-from sklearn.preprocessing._label import _inverse_binarize_multiclass
+from scipy.sparse import (
+    coo_matrix,
+    csc_matrix,
+    csr_matrix,
+    dok_matrix,
+    issparse,
+    lil_matrix,
+)
 
 from sklearn import datasets
+from sklearn.preprocessing._label import (
+    LabelBinarizer,
+    LabelEncoder,
+    MultiLabelBinarizer,
+    _inverse_binarize_multiclass,
+    _inverse_binarize_thresholding,
+    label_binarize,
+)
+from sklearn.utils import _to_object_array
+from sklearn.utils._testing import assert_array_equal, ignore_warnings
+from sklearn.utils.multiclass import type_of_target
 
 iris = datasets.load_iris()
 
@@ -675,3 +672,17 @@ def test_nan_label_encoder():
 
     y_trans = le.transform([np.nan])
     assert_array_equal(y_trans, [2])
+
+
+@pytest.mark.parametrize(
+    "encoder", [LabelEncoder(), LabelBinarizer(), MultiLabelBinarizer()]
+)
+def test_label_encoders_do_not_have_set_output(encoder):
+    """Check that label encoders do not define set_output and work with y as a kwarg.
+
+    Non-regression test for #26854.
+    """
+    assert not hasattr(encoder, "set_output")
+    y_encoded_with_kwarg = encoder.fit_transform(y=["a", "b", "c"])
+    y_encoded_positional = encoder.fit_transform(["a", "b", "c"])
+    assert_array_equal(y_encoded_with_kwarg, y_encoded_positional)
diff --git a/sklearn/preprocessing/tests/test_polynomial.py b/sklearn/preprocessing/tests/test_polynomial.py
index ab5c8ea4de95f..de1f29026a62b 100644
--- a/sklearn/preprocessing/tests/test_polynomial.py
+++ b/sklearn/preprocessing/tests/test_polynomial.py
@@ -1,13 +1,12 @@
+import sys
+
 import numpy as np
 import pytest
-import sys
+from numpy.testing import assert_allclose, assert_array_equal
 from scipy import sparse
+from scipy.interpolate import BSpline
 from scipy.sparse import random as sparse_random
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils.fixes import sp_version, parse_version
 
-from numpy.testing import assert_allclose, assert_array_equal
-from scipy.interpolate import BSpline
 from sklearn.linear_model import LinearRegression
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import (
@@ -16,10 +15,12 @@
     SplineTransformer,
 )
 from sklearn.preprocessing._csr_polynomial_expansion import (
-    _calc_total_nnz,
     _calc_expanded_nnz,
+    _calc_total_nnz,
     _get_sizeof_LARGEST_INT_t,
 )
+from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils.fixes import parse_version, sp_version
 
 
 @pytest.mark.parametrize("est", (PolynomialFeatures, SplineTransformer))
@@ -422,8 +423,10 @@ def test_spline_transformer_sparse_output(
     splt_dense.fit(X)
     splt_sparse.fit(X)
 
-    assert sparse.isspmatrix_csr(splt_sparse.transform(X))
-    assert_allclose(splt_dense.transform(X), splt_sparse.transform(X).toarray())
+    X_trans_sparse = splt_sparse.transform(X)
+    X_trans_dense = splt_dense.transform(X)
+    assert sparse.issparse(X_trans_sparse) and X_trans_sparse.format == "csr"
+    assert_allclose(X_trans_dense, X_trans_sparse.toarray())
 
     # extrapolation regime
     X_min = np.amin(X, axis=0)
@@ -720,9 +723,9 @@ def test_polynomial_features_csc_X(deg, include_bias, interaction_only, dtype):
     Xt_csc = est.fit_transform(X_csc.astype(dtype))
     Xt_dense = est.fit_transform(X.astype(dtype))
 
-    assert sparse.isspmatrix_csc(Xt_csc)
+    assert sparse.issparse(Xt_csc) and Xt_csc.format == "csc"
     assert Xt_csc.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csc.A, Xt_dense)
+    assert_array_almost_equal(Xt_csc.toarray(), Xt_dense)
 
 
 @pytest.mark.parametrize(
@@ -747,9 +750,9 @@ def test_polynomial_features_csr_X(deg, include_bias, interaction_only, dtype):
     Xt_csr = est.fit_transform(X_csr.astype(dtype))
     Xt_dense = est.fit_transform(X.astype(dtype, copy=False))
 
-    assert sparse.isspmatrix_csr(Xt_csr)
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
     assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
 
 
 @pytest.mark.parametrize("n_features", [1, 4, 5])
@@ -806,9 +809,9 @@ def test_polynomial_features_csr_X_floats(deg, include_bias, interaction_only, d
     Xt_csr = est.fit_transform(X_csr.astype(dtype))
     Xt_dense = est.fit_transform(X.astype(dtype))
 
-    assert sparse.isspmatrix_csr(Xt_csr)
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
     assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
 
 
 @pytest.mark.parametrize(
@@ -837,9 +840,9 @@ def test_polynomial_features_csr_X_zero_row(zero_row_index, deg, interaction_onl
     Xt_csr = est.fit_transform(X_csr)
     Xt_dense = est.fit_transform(X)
 
-    assert sparse.isspmatrix_csr(Xt_csr)
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
     assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
 
 
 # This degree should always be one more than the highest degree supported by
@@ -858,9 +861,9 @@ def test_polynomial_features_csr_X_degree_4(include_bias, interaction_only):
     Xt_csr = est.fit_transform(X_csr)
     Xt_dense = est.fit_transform(X)
 
-    assert sparse.isspmatrix_csr(X_csr)
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
     assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
 
 
 @pytest.mark.parametrize(
@@ -886,9 +889,9 @@ def test_polynomial_features_csr_X_dim_edges(deg, dim, interaction_only):
     Xt_csr = est.fit_transform(X_csr)
     Xt_dense = est.fit_transform(X)
 
-    assert sparse.isspmatrix_csr(Xt_csr)
+    assert sparse.issparse(Xt_csr) and Xt_csr.format == "csr"
     assert Xt_csr.dtype == Xt_dense.dtype
-    assert_array_almost_equal(Xt_csr.A, Xt_dense)
+    assert_array_almost_equal(Xt_csr.toarray(), Xt_dense)
 
 
 @pytest.mark.parametrize("interaction_only", [True, False])
diff --git a/sklearn/preprocessing/tests/test_target_encoder.py b/sklearn/preprocessing/tests/test_target_encoder.py
index 7cbd3a58820bc..38aa0569b9d40 100644
--- a/sklearn/preprocessing/tests/test_target_encoder.py
+++ b/sklearn/preprocessing/tests/test_target_encoder.py
@@ -1,21 +1,22 @@
 import numpy as np
-from numpy.testing import assert_allclose
-from numpy.testing import assert_array_equal
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 
-from sklearn.preprocessing import (
-    TargetEncoder,
-    LabelEncoder,
-    KBinsDiscretizer,
-)
-from sklearn.model_selection import KFold
-from sklearn.model_selection import StratifiedKFold
-from sklearn.model_selection import ShuffleSplit
-from sklearn.model_selection import cross_val_score
-from sklearn.model_selection import train_test_split
 from sklearn.ensemble import RandomForestRegressor
 from sklearn.linear_model import Ridge
+from sklearn.model_selection import (
+    KFold,
+    ShuffleSplit,
+    StratifiedKFold,
+    cross_val_score,
+    train_test_split,
+)
 from sklearn.pipeline import make_pipeline
+from sklearn.preprocessing import (
+    KBinsDiscretizer,
+    LabelEncoder,
+    TargetEncoder,
+)
 
 
 def _encode_target(X_ordinal, y_int, n_categories, smooth):
@@ -60,54 +61,70 @@ def _encode_target(X_ordinal, y_int, n_categories, smooth):
 @pytest.mark.parametrize("smooth", [5.0, "auto"])
 @pytest.mark.parametrize("target_type", ["binary", "continuous"])
 def test_encoding(categories, unknown_value, global_random_seed, smooth, target_type):
-    """Check encoding for binary and continuous targets."""
+    """Check encoding for binary and continuous targets.
+
+    Compare the values returned by `TargetEncoder.fit_transform` against the
+    expected encodings for cv splits from a naive reference Python
+    implementation in _encode_target.
+    """
 
-    X_train_array = np.array([[0] * 20 + [1] * 30 + [2] * 40], dtype=np.int64).T
-    X_test_array = np.array([[0, 1, 2]], dtype=np.int64).T
     n_categories = 3
-    n_samples = X_train_array.shape[0]
+    X_train_int_array = np.array([[0] * 20 + [1] * 30 + [2] * 40], dtype=np.int64).T
+    X_test_int_array = np.array([[0, 1, 2]], dtype=np.int64).T
+    n_samples = X_train_int_array.shape[0]
 
     if categories == "auto":
-        X_train = X_train_array
+        X_train = X_train_int_array
+        X_test = X_test_int_array
     else:
-        X_train = categories[0][X_train_array]
+        X_train = categories[0][X_train_int_array]
+        X_test = categories[0][X_test_int_array]
 
-    if categories == "auto":
-        X_test = X_test_array
-    else:
-        X_test = categories[0][X_test_array]
     X_test = np.concatenate((X_test, [[unknown_value]]))
 
-    rng = np.random.RandomState(global_random_seed)
-
+    data_rng = np.random.RandomState(global_random_seed)
+    n_splits = 3
     if target_type == "binary":
-        y_int = rng.randint(low=0, high=2, size=n_samples)
+        y_int = data_rng.randint(low=0, high=2, size=n_samples)
         target_names = np.array(["cat", "dog"], dtype=object)
         y_train = target_names[y_int]
-        cv = StratifiedKFold(n_splits=3, random_state=0, shuffle=True)
+
     else:  # target_type == continuous
-        y_int = rng.uniform(low=-10, high=20, size=n_samples)
+        y_int = data_rng.uniform(low=-10, high=20, size=n_samples)
         y_train = y_int
-        cv = KFold(n_splits=3, random_state=0, shuffle=True)
 
-    shuffled_idx = rng.permutation(n_samples)
-    X_train_array = X_train_array[shuffled_idx]
+    shuffled_idx = data_rng.permutation(n_samples)
+    X_train_int_array = X_train_int_array[shuffled_idx]
     X_train = X_train[shuffled_idx]
     y_train = y_train[shuffled_idx]
     y_int = y_int[shuffled_idx]
 
-    # Get encodings for cv splits to validate `fit_transform`
-    expected_X_fit_transform = np.empty_like(X_train_array, dtype=np.float64)
+    # Define our CV splitting strategy
+    if target_type == "binary":
+        cv = StratifiedKFold(
+            n_splits=n_splits, random_state=global_random_seed, shuffle=True
+        )
+    else:
+        cv = KFold(n_splits=n_splits, random_state=global_random_seed, shuffle=True)
+
+    # Compute the expected values using our reference Python implementation of
+    # target encoding:
+    expected_X_fit_transform = np.empty_like(X_train_int_array, dtype=np.float64)
 
-    for train_idx, test_idx in cv.split(X_train_array, y_train):
-        X_, y_ = X_train_array[train_idx, 0], y_int[train_idx]
+    for train_idx, test_idx in cv.split(X_train_int_array, y_train):
+        X_, y_ = X_train_int_array[train_idx, 0], y_int[train_idx]
         cur_encodings = _encode_target(X_, y_, n_categories, smooth)
         expected_X_fit_transform[test_idx, 0] = cur_encodings[
-            X_train_array[test_idx, 0]
+            X_train_int_array[test_idx, 0]
         ]
 
+    # Check that we can obtain the same encodings by calling `fit_transform` on
+    # the estimator with the same CV parameters:
     target_encoder = TargetEncoder(
-        smooth=smooth, categories=categories, cv=3, random_state=0
+        smooth=smooth,
+        categories=categories,
+        cv=n_splits,
+        random_state=global_random_seed,
     )
 
     X_fit_transform = target_encoder.fit_transform(X_train, y_train)
@@ -119,12 +136,12 @@ def test_encoding(categories, unknown_value, global_random_seed, smooth, target_
     # compute encodings for all data to validate `transform`
     y_mean = np.mean(y_int)
     expected_encodings = _encode_target(
-        X_train_array[:, 0], y_int, n_categories, smooth
+        X_train_int_array[:, 0], y_int, n_categories, smooth
     )
     assert_allclose(target_encoder.encodings_[0], expected_encodings)
     assert target_encoder.target_mean_ == pytest.approx(y_mean)
 
-    # Transform on test data, the last value is unknown is it is encoded as the target
+    # Transform on test data, the last value is unknown so it is encoded as the target
     # mean
     expected_X_test_transform = np.concatenate(
         (expected_encodings, np.array([y_mean]))
@@ -393,7 +410,7 @@ def test_smooth_zero():
     # it will be encoded as the mean of the second half
     assert_allclose(X_trans[0], np.mean(y[5:]))
 
-    # category 1 does nto exist in the first half, thus it will be encoded as
+    # category 1 does not exist in the first half, thus it will be encoded as
     # the mean of the first half
     assert_allclose(X_trans[-1], np.mean(y[:5]))
 
@@ -401,7 +418,7 @@ def test_smooth_zero():
 @pytest.mark.parametrize("smooth", [0.0, 1e3, "auto"])
 def test_invariance_of_encoding_under_label_permutation(smooth, global_random_seed):
     # Check that the encoding does not depend on the integer of the value of
-    # the integer labels. This is quite of a trivial property but it is helpful
+    # the integer labels. This is quite a trivial property but it is helpful
     # to understand the following test.
     rng = np.random.RandomState(global_random_seed)
 
@@ -439,7 +456,7 @@ def test_invariance_of_encoding_under_label_permutation(smooth, global_random_se
 @pytest.mark.parametrize("smooth", [0.0, "auto"])
 def test_target_encoding_for_linear_regression(smooth, global_random_seed):
     # Check some expected statistical properties when fitting a linear
-    # regression model on target encoded features depending on there relation
+    # regression model on target encoded features depending on their relation
     # with that target.
 
     # In this test, we use the Ridge class with the "lsqr" solver and a little
@@ -483,7 +500,7 @@ def test_target_encoding_for_linear_regression(smooth, global_random_seed):
     # itself independent of the target variable: target encoding such a feature
     # without internal cross-validation should cause catastrophic overfitting
     # for the downstream regressor, even with shrinkage. This kind of features
-    # typically represents near unique idenfiers of samples. In general they
+    # typically represents near unique identifiers of samples. In general they
     # should be removed from a machine learning datasets but here we want to
     # study the ability of the default behavior of TargetEncoder to mitigate
     # them automatically.
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index ca0ee41784ab5..c8c0193ac9b0b 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -31,18 +31,21 @@
 from numbers import Integral, Real
 
 import numpy as np
-from scipy import linalg
 import scipy.sparse as sp
+from scipy import linalg
 
-from .base import BaseEstimator, TransformerMixin
-from .base import ClassNamePrefixFeaturesOutMixin
-from .base import _fit_context
+from .base import (
+    BaseEstimator,
+    ClassNamePrefixFeaturesOutMixin,
+    TransformerMixin,
+    _fit_context,
+)
+from .exceptions import DataDimensionalityWarning
 from .utils import check_random_state
 from .utils._param_validation import Interval, StrOptions, validate_params
 from .utils.extmath import safe_sparse_dot
 from .utils.random import sample_without_replacement
 from .utils.validation import check_array, check_is_fitted
-from .exceptions import DataDimensionalityWarning
 
 __all__ = [
     "SparseRandomProjection",
@@ -55,7 +58,8 @@
     {
         "n_samples": ["array-like", Interval(Real, 1, None, closed="left")],
         "eps": ["array-like", Interval(Real, 0, 1, closed="neither")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def johnson_lindenstrauss_min_dim(n_samples, *, eps=0.1):
     """Find a 'safe' number of components to randomly project to.
diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py
index 9d7786bc1d67e..2b3d0e2018430 100644
--- a/sklearn/semi_supervised/_label_propagation.py
+++ b/sklearn/semi_supervised/_label_propagation.py
@@ -55,23 +55,22 @@
 # Authors: Clay Woolam <clay@woolam.org>
 #          Utkarsh Upadhyay <mail@musicallyut.in>
 # License: BSD
+import warnings
 from abc import ABCMeta, abstractmethod
 from numbers import Integral, Real
 
-import warnings
 import numpy as np
 from scipy import sparse
 from scipy.sparse import csgraph
 
-from ..base import BaseEstimator, ClassifierMixin
-from ..base import _fit_context
+from ..base import BaseEstimator, ClassifierMixin, _fit_context
+from ..exceptions import ConvergenceWarning
 from ..metrics.pairwise import rbf_kernel
 from ..neighbors import NearestNeighbors
+from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import safe_sparse_dot
 from ..utils.multiclass import check_classification_targets
 from ..utils.validation import check_is_fitted
-from ..utils._param_validation import Interval, StrOptions
-from ..exceptions import ConvergenceWarning
 
 
 class BaseLabelPropagation(ClassifierMixin, BaseEstimator, metaclass=ABCMeta):
@@ -295,7 +294,7 @@ def fit(self, X, y):
         l_previous = np.zeros((self.X_.shape[0], n_classes))
 
         unlabeled = unlabeled[:, np.newaxis]
-        if sparse.isspmatrix(graph_matrix):
+        if sparse.issparse(graph_matrix):
             graph_matrix = graph_matrix.tocsr()
 
         for self.n_iter_ in range(self.max_iter):
@@ -398,7 +397,6 @@ class LabelPropagation(BaseLabelPropagation):
 
     See Also
     --------
-    BaseLabelPropagation : Base class for label propagation module.
     LabelSpreading : Alternate label propagation strategy more robust to noise.
 
     References
@@ -457,7 +455,7 @@ class distributions will exceed 1 (normalization may be desired).
             self.nn_fit = None
         affinity_matrix = self._get_kernel(self.X_)
         normalizer = affinity_matrix.sum(axis=0)
-        if sparse.isspmatrix(affinity_matrix):
+        if sparse.issparse(affinity_matrix):
             affinity_matrix.data /= np.diag(np.array(normalizer))
         else:
             affinity_matrix /= normalizer[:, np.newaxis]
@@ -617,7 +615,7 @@ def _build_graph(self):
         affinity_matrix = self._get_kernel(self.X_)
         laplacian = csgraph.laplacian(affinity_matrix, normed=True)
         laplacian = -laplacian
-        if sparse.isspmatrix(laplacian):
+        if sparse.issparse(laplacian):
             diag_mask = laplacian.row == laplacian.col
             laplacian.data[diag_mask] = 0.0
         else:
diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
index c4706df1754da..725610d3b82de 100644
--- a/sklearn/semi_supervised/_self_training.py
+++ b/sklearn/semi_supervised/_self_training.py
@@ -3,12 +3,11 @@
 
 import numpy as np
 
-from ..base import MetaEstimatorMixin, clone, BaseEstimator
-from ..base import _fit_context
+from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone
+from ..utils import safe_mask
 from ..utils._param_validation import HasMethods, Interval, StrOptions
-from ..utils.validation import check_is_fitted
 from ..utils.metaestimators import available_if
-from ..utils import safe_mask
+from ..utils.validation import check_is_fitted
 
 __all__ = ["SelfTrainingClassifier"]
 
diff --git a/sklearn/semi_supervised/tests/test_label_propagation.py b/sklearn/semi_supervised/tests/test_label_propagation.py
index 2610719dd9c53..8812c3c352a03 100644
--- a/sklearn/semi_supervised/tests/test_label_propagation.py
+++ b/sklearn/semi_supervised/tests/test_label_propagation.py
@@ -1,21 +1,22 @@
 """ test the label propagation module """
 
-import numpy as np
-import pytest
 import warnings
 
+import numpy as np
+import pytest
 from scipy.sparse import issparse
-from sklearn.semi_supervised import _label_propagation as label_propagation
+
+from sklearn.datasets import make_classification
+from sklearn.exceptions import ConvergenceWarning
 from sklearn.metrics.pairwise import rbf_kernel
 from sklearn.model_selection import train_test_split
 from sklearn.neighbors import NearestNeighbors
-from sklearn.datasets import make_classification
-from sklearn.exceptions import ConvergenceWarning
+from sklearn.semi_supervised import _label_propagation as label_propagation
 from sklearn.utils._testing import (
+    _convert_container,
     assert_allclose,
     assert_array_equal,
 )
-from sklearn.utils._testing import _convert_container
 
 CONSTRUCTOR_TYPES = ("array", "sparse_csr", "sparse_csc")
 
diff --git a/sklearn/semi_supervised/tests/test_self_training.py b/sklearn/semi_supervised/tests/test_self_training.py
index 929a99ba0493b..71f0848f5767c 100644
--- a/sklearn/semi_supervised/tests/test_self_training.py
+++ b/sklearn/semi_supervised/tests/test_self_training.py
@@ -1,18 +1,17 @@
 from math import ceil
 
 import numpy as np
-from numpy.testing import assert_array_equal
 import pytest
+from numpy.testing import assert_array_equal
 
+from sklearn.datasets import load_iris, make_blobs
 from sklearn.ensemble import StackingClassifier
 from sklearn.exceptions import NotFittedError
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.svm import SVC
-from sklearn.model_selection import train_test_split
-from sklearn.datasets import load_iris, make_blobs
 from sklearn.metrics import accuracy_score
-
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import KNeighborsClassifier
 from sklearn.semi_supervised import SelfTrainingClassifier
+from sklearn.svm import SVC
 
 # Author: Oliver Rausch <rauscho@ethz.ch>
 # License: BSD 3 clause
diff --git a/sklearn/svm/__init__.py b/sklearn/svm/__init__.py
index f5b4123230f93..0d64ce24cdd63 100644
--- a/sklearn/svm/__init__.py
+++ b/sklearn/svm/__init__.py
@@ -10,8 +10,8 @@
 #         of their respective owners.
 # License: BSD 3 clause (C) INRIA 2010
 
-from ._classes import SVC, NuSVC, SVR, NuSVR, OneClassSVM, LinearSVC, LinearSVR
 from ._bounds import l1_min_c
+from ._classes import SVC, SVR, LinearSVC, LinearSVR, NuSVC, NuSVR, OneClassSVM
 
 __all__ = [
     "LinearSVC",
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
index a54c31cecb6e1..eb126ec77e526 100644
--- a/sklearn/svm/_base.py
+++ b/sklearn/svm/_base.py
@@ -5,28 +5,27 @@
 import numpy as np
 import scipy.sparse as sp
 
+from ..base import BaseEstimator, ClassifierMixin, _fit_context
+from ..exceptions import ConvergenceWarning, NotFittedError
+from ..preprocessing import LabelEncoder
+from ..utils import check_array, check_random_state, column_or_1d, compute_class_weight
+from ..utils._param_validation import Interval, StrOptions
+from ..utils.extmath import safe_sparse_dot
+from ..utils.metaestimators import available_if
+from ..utils.multiclass import _ovr_decision_function, check_classification_targets
+from ..utils.validation import (
+    _check_large_sparse,
+    _check_sample_weight,
+    _num_samples,
+    check_consistent_length,
+    check_is_fitted,
+)
+from . import _liblinear as liblinear  # type: ignore
+
 # mypy error: error: Module 'sklearn.svm' has no attribute '_libsvm'
 # (and same for other imports)
 from . import _libsvm as libsvm  # type: ignore
-from . import _liblinear as liblinear  # type: ignore
 from . import _libsvm_sparse as libsvm_sparse  # type: ignore
-from ..base import BaseEstimator, ClassifierMixin
-from ..base import _fit_context
-from ..preprocessing import LabelEncoder
-from ..utils.multiclass import _ovr_decision_function
-from ..utils import check_array, check_random_state
-from ..utils import column_or_1d
-from ..utils import compute_class_weight
-from ..utils.metaestimators import available_if
-from ..utils.extmath import safe_sparse_dot
-from ..utils.validation import check_is_fitted, _check_large_sparse
-from ..utils.validation import _num_samples
-from ..utils.validation import _check_sample_weight, check_consistent_length
-from ..utils.multiclass import check_classification_targets
-from ..utils._param_validation import Interval, StrOptions
-from ..exceptions import ConvergenceWarning
-from ..exceptions import NotFittedError
-
 
 LIBSVM_IMPL = ["c_svc", "nu_svc", "one_class", "epsilon_svr", "nu_svr"]
 
@@ -180,7 +179,7 @@ def fit(self, X, y, sample_weight=None):
         """
         rnd = check_random_state(self.random_state)
 
-        sparse = sp.isspmatrix(X)
+        sparse = sp.issparse(X)
         if sparse and self.kernel == "precomputed":
             raise TypeError("Sparse precomputed kernels are not supported.")
         self._sparse = sparse and not callable(self.kernel)
@@ -618,7 +617,7 @@ def _validate_for_predict(self, X):
                 reset=False,
             )
 
-        if self._sparse and not sp.isspmatrix(X):
+        if self._sparse and not sp.issparse(X):
             X = sp.csr_matrix(X)
         if self._sparse:
             X.sort_indices()
@@ -826,7 +825,7 @@ def predict(self, X):
     def _check_proba(self):
         if not self.probability:
             raise AttributeError(
-                "predict_proba is not available when  probability=False"
+                "predict_proba is not available when probability=False"
             )
         if self._impl not in ("c_svc", "nu_svc"):
             raise AttributeError("predict_proba only implemented for SVC and NuSVC")
@@ -836,7 +835,7 @@ def _check_proba(self):
     def predict_proba(self, X):
         """Compute probabilities of possible outcomes for samples in X.
 
-        The model need to have probability information computed at training
+        The model needs to have probability information computed at training
         time: fit with attribute `probability` set to True.
 
         Parameters
@@ -1096,18 +1095,26 @@ def _fit_liblinear(
         Target vector relative to X
 
     C : float
-        Inverse of cross-validation parameter. Lower the C, the more
+        Inverse of cross-validation parameter. The lower the C, the higher
         the penalization.
 
     fit_intercept : bool
-        Whether or not to fit the intercept, that is to add a intercept
-        term to the decision function.
+        Whether or not to fit an intercept. If set to True, the feature vector
+        is extended to include an intercept term: ``[x_1, ..., x_n, 1]``, where
+        1 corresponds to the intercept. If set to False, no intercept will be
+        used in calculations (i.e. data is expected to be already centered).
 
     intercept_scaling : float
-        LibLinear internally penalizes the intercept and this term is subject
-        to regularization just like the other terms of the feature vector.
-        In order to avoid this, one should increase the intercept_scaling.
-        such that the feature vector becomes [x, intercept_scaling].
+        Liblinear internally penalizes the intercept, treating it like any
+        other term in the feature vector. To reduce the impact of the
+        regularization on the intercept, the `intercept_scaling` parameter can
+        be set to a value greater than 1; the higher the value of
+        `intercept_scaling`, the lower the impact of regularization on it.
+        Then, the weights become `[w_x_1, ..., w_x_n,
+        w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
+        the feature weights and the intercept weight is scaled by
+        `intercept_scaling`. This scaling allows the intercept term to have a
+        different regularization behavior compared to the other features.
 
     class_weight : dict or 'balanced', default=None
         Weights associated with classes in the form ``{class_label: weight}``.
@@ -1223,7 +1230,7 @@ def _fit_liblinear(
     raw_coef_, n_iter_ = liblinear.train_wrap(
         X,
         y_ind,
-        sp.isspmatrix(X),
+        sp.issparse(X),
         solver_type,
         tol,
         bias,
diff --git a/sklearn/svm/_bounds.py b/sklearn/svm/_bounds.py
index 83cb72d30892c..feb5e0227f5df 100644
--- a/sklearn/svm/_bounds.py
+++ b/sklearn/svm/_bounds.py
@@ -7,9 +7,9 @@
 import numpy as np
 
 from ..preprocessing import LabelBinarizer
-from ..utils.validation import check_consistent_length, check_array
+from ..utils._param_validation import Interval, StrOptions, validate_params
 from ..utils.extmath import safe_sparse_dot
-from ..utils._param_validation import StrOptions, Interval, validate_params
+from ..utils.validation import check_array, check_consistent_length
 
 
 @validate_params(
@@ -19,7 +19,8 @@
         "loss": [StrOptions({"squared_hinge", "log"})],
         "fit_intercept": ["boolean"],
         "intercept_scaling": [Interval(Real, 0, None, closed="neither")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def l1_min_c(X, y, *, loss="squared_hinge", fit_intercept=True, intercept_scaling=1.0):
     """Return the lowest bound for C.
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index a438d007da970..1ca6a8b8c6067 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -1,16 +1,15 @@
-from numbers import Integral, Real
 import warnings
+from numbers import Integral, Real
 
 import numpy as np
 
-from ._base import _fit_liblinear, _get_liblinear_solver_type, BaseSVC, BaseLibSVM
-from ..base import BaseEstimator, RegressorMixin, OutlierMixin
-from ..base import _fit_context
-from ..linear_model._base import LinearClassifierMixin, SparseCoefMixin, LinearModel
+from ..base import BaseEstimator, OutlierMixin, RegressorMixin, _fit_context
+from ..linear_model._base import LinearClassifierMixin, LinearModel, SparseCoefMixin
 from ..utils import deprecated
-from ..utils.validation import _num_samples
+from ..utils._param_validation import Hidden, Interval, StrOptions
 from ..utils.multiclass import check_classification_targets
-from ..utils._param_validation import Interval, StrOptions, Hidden
+from ..utils.validation import _num_samples
+from ._base import BaseLibSVM, BaseSVC, _fit_liblinear, _get_liblinear_solver_type
 
 
 def _validate_dual_parameter(dual, loss, penalty, multi_class, X):
@@ -50,6 +49,10 @@ class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
     penalties and loss functions and should scale better to large numbers of
     samples.
 
+    The main differences between :class:`~sklearn.svm.LinearSVC` and
+    :class:`~sklearn.svm.SVC` lie in the loss function used by default, and in
+    the handling of intercept regularization between those two implementations.
+
     This class supports both dense and sparse input and the multiclass support
     is handled according to a one-vs-the-rest scheme.
 
@@ -73,12 +76,13 @@ class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
         optimization problem. Prefer dual=False when n_samples > n_features.
         `dual="auto"` will choose the value of the parameter automatically,
         based on the values of `n_samples`, `n_features`, `loss`, `multi_class`
-        and `penalty`. If `n_samples` < `n_features` and optmizer supports
+        and `penalty`. If `n_samples` < `n_features` and optimizer supports
         chosen `loss`, `multi_class` and `penalty`, then dual will be set to True,
         otherwise it will be set to False.
 
         .. versionchanged:: 1.3
-           The default value will change from `True` to `"auto"` in 1.5.
+           The `"auto"` option is added in version 1.3 and will be the default
+           in version 1.5.
 
     tol : float, default=1e-4
         Tolerance for stopping criteria.
@@ -99,20 +103,26 @@ class LinearSVC(LinearClassifierMixin, SparseCoefMixin, BaseEstimator):
         will be ignored.
 
     fit_intercept : bool, default=True
-        Whether to calculate the intercept for this model. If set
-        to false, no intercept will be used in calculations
-        (i.e. data is expected to be already centered).
+        Whether or not to fit an intercept. If set to True, the feature vector
+        is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where
+        1 corresponds to the intercept. If set to False, no intercept will be
+        used in calculations (i.e. data is expected to be already centered).
 
     intercept_scaling : float, default=1.0
-        When self.fit_intercept is True, instance vector x becomes
-        ``[x, self.intercept_scaling]``,
-        i.e. a "synthetic" feature with constant value equals to
-        intercept_scaling is appended to the instance vector.
-        The intercept becomes intercept_scaling * synthetic feature weight
-        Note! the synthetic feature weight is subject to l1/l2 regularization
-        as all other features.
-        To lessen the effect of regularization on synthetic feature weight
-        (and therefore on the intercept) intercept_scaling has to be increased.
+        When `fit_intercept` is True, the instance vector x becomes ``[x_1,
+        ..., x_n, intercept_scaling]``, i.e. a "synthetic" feature with a
+        constant value equal to `intercept_scaling` is appended to the instance
+        vector. The intercept becomes intercept_scaling * synthetic feature
+        weight. Note that liblinear internally penalizes the intercept,
+        treating it like any other term in the feature vector. To reduce the
+        impact of the regularization on the intercept, the `intercept_scaling`
+        parameter can be set to a value greater than 1; the higher the value of
+        `intercept_scaling`, the lower the impact of regularization on it.
+        Then, the weights become `[w_x_1, ..., w_x_n,
+        w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
+        the feature weights and the intercept weight is scaled by
+        `intercept_scaling`. This scaling allows the intercept term to have a
+        different regularization behavior compared to the other features.
 
     class_weight : dict or 'balanced', default=None
         Set the parameter C of class i to ``class_weight[i]*C`` for
@@ -362,6 +372,10 @@ class LinearSVR(RegressorMixin, LinearModel):
     penalties and loss functions and should scale better to large numbers of
     samples.
 
+    The main differences between :class:`~sklearn.svm.LinearSVR` and
+    :class:`~sklearn.svm.SVR` lie in the loss function used by default, and in
+    the handling of intercept regularization between those two implementations.
+
     This class supports both dense and sparse input.
 
     Read more in the :ref:`User Guide <svm_regression>`.
@@ -389,31 +403,38 @@ class LinearSVR(RegressorMixin, LinearModel):
         loss ('squared_epsilon_insensitive') is the L2 loss.
 
     fit_intercept : bool, default=True
-        Whether to calculate the intercept for this model. If set
-        to false, no intercept will be used in calculations
-        (i.e. data is expected to be already centered).
+        Whether or not to fit an intercept. If set to True, the feature vector
+        is extended to include an intercept term: `[x_1, ..., x_n, 1]`, where
+        1 corresponds to the intercept. If set to False, no intercept will be
+        used in calculations (i.e. data is expected to be already centered).
 
     intercept_scaling : float, default=1.0
-        When self.fit_intercept is True, instance vector x becomes
-        [x, self.intercept_scaling],
-        i.e. a "synthetic" feature with constant value equals to
-        intercept_scaling is appended to the instance vector.
-        The intercept becomes intercept_scaling * synthetic feature weight
-        Note! the synthetic feature weight is subject to l1/l2 regularization
-        as all other features.
-        To lessen the effect of regularization on synthetic feature weight
-        (and therefore on the intercept) intercept_scaling has to be increased.
+        When `fit_intercept` is True, the instance vector x becomes `[x_1, ...,
+        x_n, intercept_scaling]`, i.e. a "synthetic" feature with a constant
+        value equal to `intercept_scaling` is appended to the instance vector.
+        The intercept becomes intercept_scaling * synthetic feature weight.
+        Note that liblinear internally penalizes the intercept, treating it
+        like any other term in the feature vector. To reduce the impact of the
+        regularization on the intercept, the `intercept_scaling` parameter can
+        be set to a value greater than 1; the higher the value of
+        `intercept_scaling`, the lower the impact of regularization on it.
+        Then, the weights become `[w_x_1, ..., w_x_n,
+        w_intercept*intercept_scaling]`, where `w_x_1, ..., w_x_n` represent
+        the feature weights and the intercept weight is scaled by
+        `intercept_scaling`. This scaling allows the intercept term to have a
+        different regularization behavior compared to the other features.
 
     dual : "auto" or bool, default=True
         Select the algorithm to either solve the dual or primal
         optimization problem. Prefer dual=False when n_samples > n_features.
         `dual="auto"` will choose the value of the parameter automatically,
         based on the values of `n_samples`, `n_features` and `loss`. If
-        `n_samples` < `n_features` and optmizer supports chosen `loss`,
+        `n_samples` < `n_features` and optimizer supports chosen `loss`,
         then dual will be set to True, otherwise it will be set to False.
 
         .. versionchanged:: 1.3
-           The default value will change from `True` to `"auto"` in 1.5.
+           The `"auto"` option is added in version 1.3 and will be the default
+           in version 1.5.
 
     verbose : int, default=0
         Enable verbose output. Note that this setting takes advantage of a
@@ -461,8 +482,8 @@ class LinearSVR(RegressorMixin, LinearModel):
         same library as this class (liblinear).
 
     SVR : Implementation of Support Vector Machine regression using libsvm:
-        the kernel can be non-linear but its SMO algorithm does not
-        scale to large number of samples as LinearSVC does.
+        the kernel can be non-linear but its SMO algorithm does not scale to
+        large number of samples as :class:`~sklearn.svm.LinearSVR` does.
 
     sklearn.linear_model.SGDRegressor : SGDRegressor can optimize the same cost
         function as LinearSVR
@@ -631,10 +652,12 @@ class SVC(BaseSVC):
 
     kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \
         default='rbf'
-        Specifies the kernel type to be used in the algorithm.
-        If none is given, 'rbf' will be used. If a callable is given it is
-        used to pre-compute the kernel matrix from data matrices; that matrix
-        should be an array of shape ``(n_samples, n_samples)``.
+        Specifies the kernel type to be used in the algorithm. If
+        none is given, 'rbf' will be used. If a callable is given it is used to
+        pre-compute the kernel matrix from data matrices; that matrix should be
+        an array of shape ``(n_samples, n_samples)``. For an intuitive
+        visualization of different kernel types see
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.
 
     degree : int, default=3
         Degree of the polynomial kernel function ('poly').
@@ -773,7 +796,7 @@ class SVC(BaseSVC):
         Indices of support vectors.
 
     support_vectors_ : ndarray of shape (n_SV, n_features)
-        Support vectors.
+        Support vectors. An empty array if kernel is precomputed.
 
     n_support_ : ndarray of shape (n_classes,), dtype=int32
         Number of support vectors for each class.
@@ -895,9 +918,11 @@ class NuSVC(BaseSVC):
 
     kernel : {'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'} or callable,  \
         default='rbf'
-         Specifies the kernel type to be used in the algorithm.
-         If none is given, 'rbf' will be used. If a callable is given it is
-         used to precompute the kernel matrix.
+        Specifies the kernel type to be used in the algorithm.
+        If none is given, 'rbf' will be used. If a callable is given it is
+        used to precompute the kernel matrix. For an intuitive
+        visualization of different kernel types see
+        :ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.
 
     degree : int, default=3
         Degree of the polynomial kernel function ('poly').
@@ -1043,6 +1068,7 @@ class NuSVC(BaseSVC):
         0 if correctly fitted, 1 if the algorithm did not converge.
 
     probA_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
+
     probB_ : ndarray of shape (n_classes * (n_classes - 1) / 2,)
         If `probability=True`, it corresponds to the parameters learned in
         Platt scaling to produce probability estimates from decision values.
diff --git a/sklearn/svm/tests/test_bounds.py b/sklearn/svm/tests/test_bounds.py
index d51865717e2fa..a1b0d02eff942 100644
--- a/sklearn/svm/tests/test_bounds.py
+++ b/sklearn/svm/tests/test_bounds.py
@@ -1,14 +1,12 @@
 import numpy as np
+import pytest
 from scipy import sparse as sp
 from scipy import stats
 
-import pytest
-
-from sklearn.svm._bounds import l1_min_c
-from sklearn.svm import LinearSVC
 from sklearn.linear_model import LogisticRegression
-from sklearn.svm._newrand import set_seed_wrap, bounded_rand_int_wrap
-
+from sklearn.svm import LinearSVC
+from sklearn.svm._bounds import l1_min_c
+from sklearn.svm._newrand import bounded_rand_int_wrap, set_seed_wrap
 
 dense_X = [[-1, 0], [0, 1], [1, 1], [1, 1]]
 sparse_X = sp.csr_matrix(dense_X)
diff --git a/sklearn/svm/tests/test_sparse.py b/sklearn/svm/tests/test_sparse.py
index 97c63b0597c48..b8a9ef651221e 100644
--- a/sklearn/svm/tests/test_sparse.py
+++ b/sklearn/svm/tests/test_sparse.py
@@ -1,16 +1,14 @@
-import pytest
-
 import numpy as np
+import pytest
 from numpy.testing import assert_array_almost_equal, assert_array_equal
 from scipy import sparse
 
-from sklearn import datasets, svm, linear_model, base
-from sklearn.datasets import make_classification, load_digits, make_blobs
-from sklearn.svm.tests import test_svm
+from sklearn import base, datasets, linear_model, svm
+from sklearn.datasets import load_digits, make_blobs, make_classification
 from sklearn.exceptions import ConvergenceWarning
-from sklearn.utils.extmath import safe_sparse_dot
+from sklearn.svm.tests import test_svm
 from sklearn.utils._testing import ignore_warnings, skip_if_32bit
-
+from sklearn.utils.extmath import safe_sparse_dot
 
 # test sample 1
 X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]])
@@ -47,7 +45,7 @@
 
 def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test):
     dense_svm.fit(X_train.toarray(), y_train)
-    if sparse.isspmatrix(X_test):
+    if sparse.issparse(X_test):
         X_test_dense = X_test.toarray()
     else:
         X_test_dense = X_test
@@ -79,7 +77,7 @@ def check_svm_model_equal(dense_svm, sparse_svm, X_train, y_train, X_test):
             dense_svm.predict_proba(X_test_dense), sparse_svm.predict_proba(X_test), 4
         )
         msg = "cannot use sparse input in 'SVC' trained on dense data"
-    if sparse.isspmatrix(X_test):
+    if sparse.issparse(X_test):
         with pytest.raises(ValueError, match=msg):
             dense_svm.predict(X_test)
 
diff --git a/sklearn/svm/tests/test_svm.py b/sklearn/svm/tests/test_svm.py
index 3748bbd7db98b..f43ca5568cfd4 100644
--- a/sklearn/svm/tests/test_svm.py
+++ b/sklearn/svm/tests/test_svm.py
@@ -3,33 +3,44 @@
 
 TODO: remove hard coded numerical results when possible
 """
-import warnings
 import re
+import warnings
 
 import numpy as np
 import pytest
-
-from numpy.testing import assert_array_equal, assert_array_almost_equal
-from numpy.testing import assert_almost_equal
-from numpy.testing import assert_allclose
+from numpy.testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 from scipy import sparse
-from sklearn import svm, linear_model, datasets, metrics, base
-from sklearn.svm import LinearSVC, OneClassSVM, SVR, NuSVR, LinearSVR
-from sklearn.svm._classes import _validate_dual_parameter
-from sklearn.model_selection import train_test_split
-from sklearn.datasets import make_classification, make_blobs
+
+from sklearn import base, datasets, linear_model, metrics, svm
+from sklearn.datasets import make_blobs, make_classification
+from sklearn.exceptions import (
+    ConvergenceWarning,
+    NotFittedError,
+    UndefinedMetricWarning,
+)
 from sklearn.metrics import f1_score
 from sklearn.metrics.pairwise import rbf_kernel
-from sklearn.utils import check_random_state
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils.validation import _num_samples
-from sklearn.utils import shuffle
-from sklearn.exceptions import ConvergenceWarning
-from sklearn.exceptions import NotFittedError, UndefinedMetricWarning
+from sklearn.model_selection import train_test_split
 from sklearn.multiclass import OneVsRestClassifier
 
 # mypy error: Module 'sklearn.svm' has no attribute '_libsvm'
-from sklearn.svm import _libsvm  # type: ignore
+from sklearn.svm import (  # type: ignore
+    SVR,
+    LinearSVC,
+    LinearSVR,
+    NuSVR,
+    OneClassSVM,
+    _libsvm,
+)
+from sklearn.svm._classes import _validate_dual_parameter
+from sklearn.utils import check_random_state, shuffle
+from sklearn.utils._testing import ignore_warnings
+from sklearn.utils.validation import _num_samples
 
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
diff --git a/sklearn/tests/random_seed.py b/sklearn/tests/random_seed.py
index 41cfe06a1d7e6..0fffd57a1016d 100644
--- a/sklearn/tests/random_seed.py
+++ b/sklearn/tests/random_seed.py
@@ -8,10 +8,11 @@
 
 https://scikit-learn.org/dev/computing/parallelism.html#sklearn-tests-global-random-seed
 """
-import pytest
 from os import environ
 from random import Random
 
+import pytest
+
 
 # Passes the main worker's random seeds to workers
 class XDistHooks:
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 8a8bff765650d..2f1c746052beb 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -1,35 +1,32 @@
 # Author: Gael Varoquaux
 # License: BSD 3 clause
 
+import pickle
 import re
+import warnings
+
 import numpy as np
-import scipy.sparse as sp
 import pytest
-import warnings
+import scipy.sparse as sp
 from numpy.testing import assert_allclose
 
 import sklearn
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_no_warnings
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.base import BaseEstimator, clone, is_classifier
-from sklearn.svm import SVC
-from sklearn.preprocessing import StandardScaler
-from sklearn.utils._set_output import _get_output_config
-from sklearn.pipeline import Pipeline
+from sklearn import config_context, datasets
+from sklearn.base import BaseEstimator, TransformerMixin, clone, is_classifier
 from sklearn.decomposition import PCA
-from sklearn.model_selection import GridSearchCV
-
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.tree import DecisionTreeRegressor
-from sklearn import datasets
 from sklearn.exceptions import InconsistentVersionWarning
-
-from sklearn.base import TransformerMixin
+from sklearn.model_selection import GridSearchCV
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.utils._mocking import MockDataFrame
-from sklearn import config_context
-import pickle
+from sklearn.utils._set_output import _get_output_config
+from sklearn.utils._testing import (
+    assert_array_equal,
+    assert_no_warnings,
+    ignore_warnings,
+)
 
 
 #############################################################################
@@ -186,6 +183,13 @@ def test_clone_nan():
     assert clf.empty is clf2.empty
 
 
+def test_clone_dict():
+    # test that clone creates a clone of a dict
+    orig = {"a": MyEstimator()}
+    cloned = clone(orig)
+    assert orig["a"] is not cloned["a"]
+
+
 def test_clone_sparse_matrices():
     sparse_matrix_classes = [
         cls
diff --git a/sklearn/tests/test_build.py b/sklearn/tests/test_build.py
index 7321603dd4e46..72cab1dfcb174 100644
--- a/sklearn/tests/test_build.py
+++ b/sklearn/tests/test_build.py
@@ -1,7 +1,8 @@
 import os
-import pytest
 import textwrap
 
+import pytest
+
 from sklearn import __version__
 from sklearn.utils._openmp_helpers import _openmp_parallelism_enabled
 
diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py
index a19785a60c308..df82cbc18e6c6 100644
--- a/sklearn/tests/test_calibration.py
+++ b/sklearn/tests/test_calibration.py
@@ -1,50 +1,53 @@
 # Authors: Alexandre Gramfort <alexandre.gramfort@telecom-paristech.fr>
 # License: BSD 3 clause
 
-import pytest
 import numpy as np
+import pytest
 from numpy.testing import assert_allclose
 from scipy import sparse
 
 from sklearn.base import BaseEstimator, clone
-from sklearn.dummy import DummyClassifier
-from sklearn.model_selection import LeaveOneOut, train_test_split
-
-from sklearn.utils._testing import (
-    assert_array_almost_equal,
-    assert_almost_equal,
-    assert_array_equal,
+from sklearn.calibration import (
+    CalibratedClassifierCV,
+    CalibrationDisplay,
+    _CalibratedClassifier,
+    _sigmoid_calibration,
+    _SigmoidCalibration,
+    calibration_curve,
 )
-from sklearn.utils.extmath import softmax
-from sklearn.exceptions import NotFittedError
-from sklearn.datasets import make_classification, make_blobs, load_iris
-from sklearn.preprocessing import LabelEncoder
-from sklearn.model_selection import KFold, cross_val_predict
-from sklearn.naive_bayes import MultinomialNB
+from sklearn.datasets import load_iris, make_blobs, make_classification
+from sklearn.dummy import DummyClassifier
 from sklearn.ensemble import (
     RandomForestClassifier,
     VotingClassifier,
 )
-from sklearn.linear_model import LogisticRegression
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.svm import LinearSVC
-from sklearn.pipeline import Pipeline, make_pipeline
-from sklearn.preprocessing import StandardScaler
-from sklearn.isotonic import IsotonicRegression
+from sklearn.exceptions import NotFittedError
 from sklearn.feature_extraction import DictVectorizer
 from sklearn.impute import SimpleImputer
+from sklearn.isotonic import IsotonicRegression
+from sklearn.linear_model import LogisticRegression, SGDClassifier
 from sklearn.metrics import brier_score_loss
-from sklearn.calibration import (
-    _CalibratedClassifier,
-    _SigmoidCalibration,
-    _sigmoid_calibration,
-    CalibratedClassifierCV,
-    CalibrationDisplay,
-    calibration_curve,
+from sklearn.model_selection import (
+    KFold,
+    LeaveOneOut,
+    check_cv,
+    cross_val_predict,
+    cross_val_score,
+    train_test_split,
 )
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import LabelEncoder, StandardScaler
+from sklearn.svm import LinearSVC
+from sklearn.tree import DecisionTreeClassifier
 from sklearn.utils._mocking import CheckingClassifier
-from sklearn.utils._testing import _convert_container
-
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.extmath import softmax
 
 N_SAMPLES = 200
 
@@ -995,3 +998,94 @@ def fit(self, X, y, sample_weight=None, fit_param=None):
     CalibratedClassifierCV(estimator=TestClassifier()).fit(
         *data, fit_param=np.ones(len(data[1]) + 1)
     )
+
+
+def test_calibrated_classifier_cv_works_with_large_confidence_scores(
+    global_random_seed,
+):
+    """Test that :class:`CalibratedClassifierCV` works with large confidence
+    scores when using the `sigmoid` method, particularly with the
+    :class:`SGDClassifier`.
+
+    Non-regression test for issue #26766.
+    """
+    prob = 0.67
+    n = 1000
+    random_noise = np.random.default_rng(global_random_seed).normal(size=n)
+
+    y = np.array([1] * int(n * prob) + [0] * (n - int(n * prob)))
+    X = 1e5 * y.reshape((-1, 1)) + random_noise
+
+    # Check that the decision function of SGDClassifier produces predicted
+    # values that are quite large, for the data under consideration.
+    cv = check_cv(cv=None, y=y, classifier=True)
+    indices = cv.split(X, y)
+    for train, test in indices:
+        X_train, y_train = X[train], y[train]
+        X_test = X[test]
+        sgd_clf = SGDClassifier(loss="squared_hinge", random_state=global_random_seed)
+        sgd_clf.fit(X_train, y_train)
+        predictions = sgd_clf.decision_function(X_test)
+        assert (predictions > 1e4).any()
+
+    # Compare the CalibratedClassifierCV using the sigmoid method with the
+    # CalibratedClassifierCV using the isotonic method. The isotonic method
+    # is used for comparison because it is numerically stable.
+    clf_sigmoid = CalibratedClassifierCV(
+        SGDClassifier(loss="squared_hinge", random_state=global_random_seed),
+        method="sigmoid",
+    )
+    score_sigmoid = cross_val_score(clf_sigmoid, X, y, scoring="roc_auc")
+
+    # The isotonic method is used for comparison because it is numerically
+    # stable.
+    clf_isotonic = CalibratedClassifierCV(
+        SGDClassifier(loss="squared_hinge", random_state=global_random_seed),
+        method="isotonic",
+    )
+    score_isotonic = cross_val_score(clf_isotonic, X, y, scoring="roc_auc")
+
+    # The AUC score should be the same because it is invariant under
+    # strictly monotonic conditions
+    assert_allclose(score_sigmoid, score_isotonic)
+
+
+def test_sigmoid_calibration_max_abs_prediction_threshold(global_random_seed):
+    random_state = np.random.RandomState(seed=global_random_seed)
+    n = 100
+    y = random_state.randint(0, 2, size=n)
+
+    # Check that for small enough predictions ranging from -2 to 2, the
+    # threshold value has no impact on the outcome
+    predictions_small = random_state.uniform(low=-2, high=2, size=100)
+
+    # Using a threshold lower than the maximum absolute value of the
+    # predictions enables internal re-scaling by max(abs(predictions_small)).
+    threshold_1 = 0.1
+    a1, b1 = _sigmoid_calibration(
+        predictions=predictions_small,
+        y=y,
+        max_abs_prediction_threshold=threshold_1,
+    )
+
+    # Using a larger threshold disables rescaling.
+    threshold_2 = 10
+    a2, b2 = _sigmoid_calibration(
+        predictions=predictions_small,
+        y=y,
+        max_abs_prediction_threshold=threshold_2,
+    )
+
+    # Using default threshold of 30 also disables the scaling.
+    a3, b3 = _sigmoid_calibration(
+        predictions=predictions_small,
+        y=y,
+    )
+
+    # Depends on the tolerance of the underlying quasy-newton solver which is
+    # not too strict by default.
+    atol = 1e-6
+    assert_allclose(a1, a2, atol=atol)
+    assert_allclose(a2, a3, atol=atol)
+    assert_allclose(b1, b2, atol=atol)
+    assert_allclose(b2, b3, atol=atol)
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
index 176a2d463d162..8b407d18f90d8 100644
--- a/sklearn/tests/test_common.py
+++ b/sklearn/tests/test_common.py
@@ -7,81 +7,82 @@
 # License: BSD 3 clause
 
 import os
-import warnings
-import sys
-import re
 import pkgutil
-from inspect import isgenerator, signature
-from itertools import product, chain
+import re
+import sys
+import warnings
 from functools import partial
+from inspect import isgenerator, signature
+from itertools import chain, product
 
-import pytest
 import numpy as np
+import pytest
 
+import sklearn
 from sklearn.cluster import (
+    OPTICS,
     AffinityPropagation,
     Birch,
     MeanShift,
-    OPTICS,
     SpectralClustering,
 )
+from sklearn.compose import ColumnTransformer
 from sklearn.datasets import make_blobs
-from sklearn.manifold import Isomap, TSNE, LocallyLinearEmbedding
+from sklearn.decomposition import PCA
+from sklearn.exceptions import ConvergenceWarning, FitFailedWarning
+
+# make it possible to discover experimental estimators when calling `all_estimators`
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.linear_model._base import LinearClassifierMixin
+from sklearn.manifold import TSNE, Isomap, LocallyLinearEmbedding
+from sklearn.model_selection import (
+    GridSearchCV,
+    HalvingGridSearchCV,
+    HalvingRandomSearchCV,
+    RandomizedSearchCV,
+)
 from sklearn.neighbors import (
-    LocalOutlierFactor,
     KNeighborsClassifier,
     KNeighborsRegressor,
+    LocalOutlierFactor,
     RadiusNeighborsClassifier,
     RadiusNeighborsRegressor,
 )
-from sklearn.preprocessing import FunctionTransformer
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import (
+    FunctionTransformer,
+    MinMaxScaler,
+    OneHotEncoder,
+    StandardScaler,
+)
 from sklearn.semi_supervised import LabelPropagation, LabelSpreading
-
-from sklearn.utils import all_estimators
-from sklearn.utils._testing import ignore_warnings
-from sklearn.exceptions import ConvergenceWarning
-from sklearn.exceptions import FitFailedWarning
-from sklearn.utils.estimator_checks import check_estimator
-
-import sklearn
-
-# make it possible to discover experimental estimators when calling `all_estimators`
-from sklearn.experimental import enable_iterative_imputer  # noqa
-from sklearn.experimental import enable_halving_search_cv  # noqa
-
-from sklearn.compose import ColumnTransformer
-from sklearn.decomposition import PCA
-from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
-from sklearn.linear_model._base import LinearClassifierMixin
-from sklearn.linear_model import LogisticRegression
-from sklearn.linear_model import Ridge
-from sklearn.model_selection import GridSearchCV
-from sklearn.model_selection import RandomizedSearchCV
-from sklearn.model_selection import HalvingGridSearchCV
-from sklearn.model_selection import HalvingRandomSearchCV
-from sklearn.pipeline import make_pipeline, Pipeline
-
-from sklearn.utils import IS_PYPY
+from sklearn.utils import IS_PYPY, all_estimators
 from sklearn.utils._tags import _DEFAULT_TAGS, _safe_tags
 from sklearn.utils._testing import (
     SkipTest,
+    ignore_warnings,
     set_random_state,
 )
 from sklearn.utils.estimator_checks import (
     _construct_instance,
-    _set_checking_parameters,
     _get_check_estimator_ids,
+    _set_checking_parameters,
     check_class_weight_balanced_linear_classifier,
-    parametrize_with_checks,
     check_dataframe_column_names_consistency,
+    check_estimator,
+    check_get_feature_names_out_error,
+    check_global_output_transform_pandas,
     check_n_features_in_after_fitting,
     check_param_validation,
-    check_transformer_get_feature_names_out,
-    check_transformer_get_feature_names_out_pandas,
     check_set_output_transform,
     check_set_output_transform_pandas,
-    check_global_ouptut_transform_pandas,
-    check_get_feature_names_out_error,
+    check_transformer_get_feature_names_out,
+    check_transformer_get_feature_names_out_pandas,
+    parametrize_with_checks,
 )
 
 
@@ -216,6 +217,9 @@ def test_import_all_consistency():
     for modname in submods + ["sklearn"]:
         if ".tests." in modname:
             continue
+        # Avoid test suite depending on setuptools
+        if "sklearn._build_utils" in modname:
+            continue
         if IS_PYPY and (
             "_svmlight_format_io" in modname
             or "feature_extraction._hashing_fast" in modname
@@ -595,9 +599,9 @@ def test_global_output_transform_pandas(estimator):
     name = estimator.__class__.__name__
     if not hasattr(estimator, "set_output"):
         pytest.skip(
-            f"Skipping check_global_ouptut_transform_pandas for {name}: Does not"
+            f"Skipping check_global_output_transform_pandas for {name}: Does not"
             " support set_output API yet"
         )
     _set_checking_parameters(estimator)
     with ignore_warnings(category=(FutureWarning)):
-        check_global_ouptut_transform_pandas(estimator.__class__.__name__, estimator)
+        check_global_output_transform_pandas(estimator.__class__.__name__, estimator)
diff --git a/sklearn/tests/test_config.py b/sklearn/tests/test_config.py
index 8bde58bf92425..1b92d58a5f28e 100644
--- a/sklearn/tests/test_config.py
+++ b/sklearn/tests/test_config.py
@@ -4,9 +4,9 @@
 
 import pytest
 
-from sklearn import get_config, set_config, config_context
 import sklearn
-from sklearn.utils.parallel import delayed, Parallel
+from sklearn import config_context, get_config, set_config
+from sklearn.utils.parallel import Parallel, delayed
 
 
 def test_config_context():
diff --git a/sklearn/tests/test_discriminant_analysis.py b/sklearn/tests/test_discriminant_analysis.py
index 91beb518df6b2..27e183fde43e0 100644
--- a/sklearn/tests/test_discriminant_analysis.py
+++ b/sklearn/tests/test_discriminant_analysis.py
@@ -1,27 +1,24 @@
 import numpy as np
-
 import pytest
-
 from scipy import linalg
 
-from sklearn.utils import check_random_state
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import _convert_container
-
-from sklearn.datasets import make_blobs
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
-from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
-from sklearn.discriminant_analysis import _cov
-from sklearn.covariance import ledoit_wolf
 from sklearn.cluster import KMeans
-
-from sklearn.covariance import ShrunkCovariance
-from sklearn.covariance import LedoitWolf
-
+from sklearn.covariance import LedoitWolf, ShrunkCovariance, ledoit_wolf
+from sklearn.datasets import make_blobs
+from sklearn.discriminant_analysis import (
+    LinearDiscriminantAnalysis,
+    QuadraticDiscriminantAnalysis,
+    _cov,
+)
 from sklearn.preprocessing import StandardScaler
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    _convert_container,
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 # Data is just 6 separable points in the plane
 X = np.array([[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]], dtype="f")
@@ -222,7 +219,7 @@ def discriminant_func(sample, coef, intercept, clazz):
 
     assert prob_ref == pytest.approx(prob_ref_2)
     # check that the probability of LDA are close to the theoretical
-    # probabilties
+    # probabilities
     assert_allclose(
         lda.predict_proba(sample), np.hstack([prob, prob_ref])[np.newaxis], atol=1e-2
     )
diff --git a/sklearn/tests/test_docstring_parameters.py b/sklearn/tests/test_docstring_parameters.py
index 6f42e81b47205..c6cba68e340f6 100644
--- a/sklearn/tests/test_docstring_parameters.py
+++ b/sklearn/tests/test_docstring_parameters.py
@@ -2,36 +2,38 @@
 #          Raghav RV <rvraghav93@gmail.com>
 # License: BSD 3 clause
 
+import importlib
 import inspect
 import warnings
-import importlib
-
-from pkgutil import walk_packages
 from inspect import signature
+from pkgutil import walk_packages
 
 import numpy as np
-
-# make it possible to discover experimental estimators when calling `all_estimators`
-from sklearn.experimental import enable_iterative_imputer  # noqa
-from sklearn.experimental import enable_halving_search_cv  # noqa
+import pytest
 
 import sklearn
-from sklearn.utils import IS_PYPY
-from sklearn.utils._testing import check_docstring_parameters
-from sklearn.utils._testing import _get_func_name
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils import all_estimators
-from sklearn.utils.estimator_checks import _enforce_estimator_tags_y
-from sklearn.utils.estimator_checks import _enforce_estimator_tags_X
-from sklearn.utils.estimator_checks import _construct_instance
-from sklearn.utils.fixes import sp_version, parse_version
-from sklearn.utils.deprecation import _is_deprecated
 from sklearn.datasets import make_classification
+
+# make it possible to discover experimental estimators when calling `all_estimators`
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
 from sklearn.linear_model import LogisticRegression
 from sklearn.preprocessing import FunctionTransformer
-
-import pytest
-
+from sklearn.utils import IS_PYPY, all_estimators
+from sklearn.utils._testing import (
+    _get_func_name,
+    check_docstring_parameters,
+    ignore_warnings,
+)
+from sklearn.utils.deprecation import _is_deprecated
+from sklearn.utils.estimator_checks import (
+    _construct_instance,
+    _enforce_estimator_tags_X,
+    _enforce_estimator_tags_y,
+)
+from sklearn.utils.fixes import parse_version, sp_version
 
 # walk_packages() ignores DeprecationWarnings, now we need to ignore
 # FutureWarnings
@@ -151,29 +153,6 @@ def test_docstring_parameters():
         raise AssertionError("Docstring Error:\n" + msg)
 
 
-@ignore_warnings(category=FutureWarning)
-def test_tabs():
-    # Test that there are no tabs in our source files
-    for importer, modname, ispkg in walk_packages(sklearn.__path__, prefix="sklearn."):
-        if IS_PYPY and (
-            "_svmlight_format_io" in modname
-            or "feature_extraction._hashing_fast" in modname
-        ):
-            continue
-
-        # because we don't import
-        mod = importlib.import_module(modname)
-
-        try:
-            source = inspect.getsource(mod)
-        except IOError:  # user probably should have run "make clean"
-            continue
-        assert "\t" not in source, (
-            '"%s" has tabs, please remove them ',
-            "or add it to the ignore list" % modname,
-        )
-
-
 def _construct_searchcv_instance(SearchCV):
     return SearchCV(LogisticRegression(), {"C": [0.1, 1]})
 
diff --git a/sklearn/tests/test_docstrings.py b/sklearn/tests/test_docstrings.py
index 9e0c0734eb787..889c33c2a832d 100644
--- a/sklearn/tests/test_docstrings.py
+++ b/sklearn/tests/test_docstrings.py
@@ -5,13 +5,11 @@
 import pytest
 
 # make it possible to discover experimental estimators when calling `all_estimators`
-from sklearn.experimental import enable_iterative_imputer  # noqa
-from sklearn.experimental import enable_halving_search_cv  # noqa
-
-from sklearn.utils.discovery import all_estimators
-from sklearn.utils.discovery import all_displays
-from sklearn.utils.discovery import all_functions
-
+from sklearn.experimental import (
+    enable_halving_search_cv,  # noqa
+    enable_iterative_imputer,  # noqa
+)
+from sklearn.utils.discovery import all_displays, all_estimators, all_functions
 
 numpydoc_validation = pytest.importorskip("numpydoc.validate")
 
@@ -177,8 +175,8 @@ def test_docstring(Klass, method, request):
 
 
 if __name__ == "__main__":
-    import sys
     import argparse
+    import sys
 
     parser = argparse.ArgumentParser(description="Validate docstring with numpydoc.")
     parser.add_argument("import_path", help="Import path to validate")
diff --git a/sklearn/tests/test_dummy.py b/sklearn/tests/test_dummy.py
index fd6b1108fe878..edc6bfe86f12a 100644
--- a/sklearn/tests/test_dummy.py
+++ b/sklearn/tests/test_dummy.py
@@ -1,17 +1,17 @@
-import pytest
-
 import numpy as np
+import pytest
 import scipy.sparse as sp
 
 from sklearn.base import clone
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils.stats import _weighted_percentile
-
 from sklearn.dummy import DummyClassifier, DummyRegressor
 from sklearn.exceptions import NotFittedError
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.stats import _weighted_percentile
 
 
 @ignore_warnings
diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py
index bcc26a294ebcc..93df0221236b8 100644
--- a/sklearn/tests/test_isotonic.py
+++ b/sklearn/tests/test_isotonic.py
@@ -1,28 +1,26 @@
-import warnings
-import numpy as np
-import pickle
 import copy
+import pickle
+import warnings
 
+import numpy as np
 import pytest
+from scipy.special import expit
 
 import sklearn
 from sklearn.datasets import make_regression
 from sklearn.isotonic import (
-    check_increasing,
-    isotonic_regression,
     IsotonicRegression,
     _make_unique,
+    check_increasing,
+    isotonic_regression,
 )
-
-from sklearn.utils.validation import check_array
+from sklearn.utils import shuffle
 from sklearn.utils._testing import (
     assert_allclose,
-    assert_array_equal,
     assert_array_almost_equal,
+    assert_array_equal,
 )
-from sklearn.utils import shuffle
-
-from scipy.special import expit
+from sklearn.utils.validation import check_array
 
 
 def test_permutation_invariance():
@@ -597,7 +595,7 @@ def test_isotonic_thresholds(increasing):
     # the data is already strictly monotonic which is not the case with
     # this random data)
     assert X_thresholds.shape[0] < X.shape[0]
-    assert np.in1d(X_thresholds, X).all()
+    assert np.isin(X_thresholds, X).all()
 
     # Output thresholds lie in the range of the training set:
     assert y_thresholds.max() <= y.max()
diff --git a/sklearn/tests/test_kernel_approximation.py b/sklearn/tests/test_kernel_approximation.py
index 8f01c7c1df9ef..c541b3c545be2 100644
--- a/sklearn/tests/test_kernel_approximation.py
+++ b/sklearn/tests/test_kernel_approximation.py
@@ -1,21 +1,28 @@
 import re
 
 import numpy as np
-from scipy.sparse import csr_matrix
 import pytest
+from scipy.sparse import csr_matrix
 
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose
-
-from sklearn.metrics.pairwise import kernel_metrics
-from sklearn.kernel_approximation import RBFSampler
-from sklearn.kernel_approximation import AdditiveChi2Sampler
-from sklearn.kernel_approximation import SkewedChi2Sampler
-from sklearn.kernel_approximation import Nystroem
-from sklearn.kernel_approximation import PolynomialCountSketch
 from sklearn.datasets import make_classification
-from sklearn.metrics.pairwise import polynomial_kernel, rbf_kernel, chi2_kernel
+from sklearn.kernel_approximation import (
+    AdditiveChi2Sampler,
+    Nystroem,
+    PolynomialCountSketch,
+    RBFSampler,
+    SkewedChi2Sampler,
+)
+from sklearn.metrics.pairwise import (
+    chi2_kernel,
+    kernel_metrics,
+    polynomial_kernel,
+    rbf_kernel,
+)
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 # generate data
 rng = np.random.RandomState(0)
@@ -106,8 +113,8 @@ def test_additive_chi2_sampler():
     X_sp_trans = transform.fit_transform(csr_matrix(X))
     Y_sp_trans = transform.transform(csr_matrix(Y))
 
-    assert_array_equal(X_trans, X_sp_trans.A)
-    assert_array_equal(Y_trans, Y_sp_trans.A)
+    assert_array_equal(X_trans, X_sp_trans.toarray())
+    assert_array_equal(Y_trans, Y_sp_trans.toarray())
 
     # test error is raised on negative input
     Y_neg = Y.copy()
diff --git a/sklearn/tests/test_kernel_ridge.py b/sklearn/tests/test_kernel_ridge.py
index 76a5c77e73be1..e0d2d2cf39574 100644
--- a/sklearn/tests/test_kernel_ridge.py
+++ b/sklearn/tests/test_kernel_ridge.py
@@ -2,13 +2,10 @@
 import scipy.sparse as sp
 
 from sklearn.datasets import make_regression
-from sklearn.linear_model import Ridge
 from sklearn.kernel_ridge import KernelRidge
+from sklearn.linear_model import Ridge
 from sklearn.metrics.pairwise import pairwise_kernels
-from sklearn.utils._testing import ignore_warnings
-
-from sklearn.utils._testing import assert_array_almost_equal
-
+from sklearn.utils._testing import assert_array_almost_equal, ignore_warnings
 
 X, y = make_regression(n_features=10, random_state=0)
 Xcsr = sp.csr_matrix(X)
diff --git a/sklearn/tests/test_metadata_routing.py b/sklearn/tests/test_metadata_routing.py
index a6e74c12f6e45..1dc9988644abf 100644
--- a/sklearn/tests/test_metadata_routing.py
+++ b/sklearn/tests/test_metadata_routing.py
@@ -6,29 +6,36 @@
 # License: BSD 3 clause
 
 import re
+
 import numpy as np
 import pytest
 
 from sklearn import config_context
-from sklearn.base import BaseEstimator
-from sklearn.base import ClassifierMixin
-from sklearn.base import RegressorMixin
-from sklearn.base import TransformerMixin
-from sklearn.base import MetaEstimatorMixin
-from sklearn.base import clone
+from sklearn.base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MetaEstimatorMixin,
+    RegressorMixin,
+    TransformerMixin,
+    clone,
+)
 from sklearn.linear_model import LinearRegression
-from sklearn.utils.validation import check_is_fitted
 from sklearn.utils import metadata_routing
-from sklearn.utils.metadata_routing import MetadataRequest
-from sklearn.utils.metadata_routing import get_routing_for_object
-from sklearn.utils.metadata_routing import MetadataRouter
-from sklearn.utils.metadata_routing import MethodMapping
-from sklearn.utils.metadata_routing import process_routing
-from sklearn.utils._metadata_requests import MethodMetadataRequest
-from sklearn.utils._metadata_requests import _MetadataRequester
-from sklearn.utils._metadata_requests import METHODS
-from sklearn.utils._metadata_requests import request_is_alias
-from sklearn.utils._metadata_requests import request_is_valid
+from sklearn.utils._metadata_requests import (
+    METHODS,
+    MethodMetadataRequest,
+    _MetadataRequester,
+    request_is_alias,
+    request_is_valid,
+)
+from sklearn.utils.metadata_routing import (
+    MetadataRequest,
+    MetadataRouter,
+    MethodMapping,
+    get_routing_for_object,
+    process_routing,
+)
+from sklearn.utils.validation import check_is_fitted
 
 rng = np.random.RandomState(42)
 N, M = 100, 4
@@ -671,9 +678,7 @@ def fit(self, X, y, **kwargs):
 def test_method_metadata_request():
     mmr = MethodMetadataRequest(owner="test", method="fit")
 
-    with pytest.raises(
-        ValueError, match="alias should be either a valid identifier or"
-    ):
+    with pytest.raises(ValueError, match="The alias you're setting for"):
         mmr.add_request(param="foo", alias=1.4)
 
     mmr.add_request(param="foo", alias=None)
diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py
index 7c7c2d9d7f606..b3c6820faefc2 100644
--- a/sklearn/tests/test_metaestimators.py
+++ b/sklearn/tests/test_metaestimators.py
@@ -5,23 +5,24 @@
 import numpy as np
 import pytest
 
-from sklearn.base import BaseEstimator
-from sklearn.base import is_regressor
+from sklearn.base import BaseEstimator, is_regressor
 from sklearn.datasets import make_classification
-from sklearn.utils import all_estimators
-from sklearn.utils.estimator_checks import _enforce_estimator_tags_X
-from sklearn.utils.estimator_checks import _enforce_estimator_tags_y
-from sklearn.utils.validation import check_is_fitted
-from sklearn.utils._testing import set_random_state
-from sklearn.pipeline import Pipeline, make_pipeline
-from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.feature_selection import RFE, RFECV
 from sklearn.ensemble import BaggingClassifier
 from sklearn.exceptions import NotFittedError
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.feature_selection import RFE, RFECV
+from sklearn.linear_model import LogisticRegression, Ridge
+from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
+from sklearn.pipeline import Pipeline, make_pipeline
+from sklearn.preprocessing import MaxAbsScaler, StandardScaler
 from sklearn.semi_supervised import SelfTrainingClassifier
-from sklearn.linear_model import Ridge, LogisticRegression
-from sklearn.preprocessing import StandardScaler, MaxAbsScaler
+from sklearn.utils import all_estimators
+from sklearn.utils._testing import set_random_state
+from sklearn.utils.estimator_checks import (
+    _enforce_estimator_tags_X,
+    _enforce_estimator_tags_y,
+)
+from sklearn.utils.validation import check_is_fitted
 
 
 class DelegatorData:
diff --git a/sklearn/tests/test_metaestimators_metadata_routing.py b/sklearn/tests/test_metaestimators_metadata_routing.py
index 892b0f21dbe8a..830c0c1a2c95b 100644
--- a/sklearn/tests/test_metaestimators_metadata_routing.py
+++ b/sklearn/tests/test_metaestimators_metadata_routing.py
@@ -171,7 +171,7 @@ def predict_log_proba(self, X, sample_weight="default", metadata="default"):
         # return np.zeros(shape=(len(X), 2))
 
 
-METAESTIMATORS = [
+METAESTIMATORS: list = [
     {
         "metaestimator": MultiOutputRegressor,
         "estimator_name": "estimator",
@@ -226,7 +226,7 @@ def predict_log_proba(self, X, sample_weight="default", metadata="default"):
 - routing_methods: list of all methods to check for routing
 - preserves_metadata: Whether the metaestimator passes the metadata to the
   sub-estimator without modification or not. If it does, we check that the
-  values are identical. If it doesn', no check is performed. TODO Maybe
+  values are identical. If it doesn't, no check is performed. TODO Maybe
   something smarter could be done if the data is modified.
 
 """
diff --git a/sklearn/tests/test_min_dependencies_readme.py b/sklearn/tests/test_min_dependencies_readme.py
index a0692d333feef..ce53cc8d28d82 100644
--- a/sklearn/tests/test_min_dependencies_readme.py
+++ b/sklearn/tests/test_min_dependencies_readme.py
@@ -2,11 +2,12 @@
 
 
 import os
-import re
 import platform
+import re
 from pathlib import Path
 
 import pytest
+
 import sklearn
 from sklearn._min_dependencies import dependent_packages
 from sklearn.utils.fixes import parse_version
@@ -52,6 +53,13 @@ def test_min_dependencies_readme():
                 assert version == min_version, f"{package} has a mismatched version"
 
 
+@pytest.mark.skip(
+    reason=(
+        "For the release purpose, the upper limit of Cython is set to 3.0 since it "
+        "leads to performance regression. Scikit-learn can still be built with "
+        "Cython 3."
+    )
+)
 def test_min_dependencies_pyproject_toml():
     """Check versions in pyproject.toml is consistent with _min_dependencies."""
     # tomllib is available in Python 3.11
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index 472d1adadc050..59044c7a3cb8c 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -1,45 +1,42 @@
+from re import escape
+
 import numpy as np
-import scipy.sparse as sp
 import pytest
+import scipy.sparse as sp
 from numpy.testing import assert_allclose
 
-from re import escape
-
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._mocking import CheckingClassifier
-from sklearn.multiclass import OneVsRestClassifier
-from sklearn.multiclass import OneVsOneClassifier
-from sklearn.multiclass import OutputCodeClassifier
-from sklearn.utils.multiclass import check_classification_targets, type_of_target
-from sklearn.utils import (
-    check_array,
-    shuffle,
-)
-
-from sklearn.metrics import precision_score
-from sklearn.metrics import recall_score
-
-from sklearn.svm import LinearSVC, SVC
-from sklearn.naive_bayes import MultinomialNB
+from sklearn import datasets, svm
+from sklearn.datasets import load_breast_cancer
+from sklearn.exceptions import NotFittedError
+from sklearn.impute import SimpleImputer
 from sklearn.linear_model import (
-    LinearRegression,
-    Lasso,
     ElasticNet,
-    Ridge,
-    Perceptron,
+    Lasso,
+    LinearRegression,
     LogisticRegression,
+    Perceptron,
+    Ridge,
     SGDClassifier,
 )
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
-from sklearn.neighbors import KNeighborsClassifier
+from sklearn.metrics import precision_score, recall_score
 from sklearn.model_selection import GridSearchCV, cross_val_score
+from sklearn.multiclass import (
+    OneVsOneClassifier,
+    OneVsRestClassifier,
+    OutputCodeClassifier,
+)
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.neighbors import KNeighborsClassifier
 from sklearn.pipeline import Pipeline, make_pipeline
-from sklearn.impute import SimpleImputer
-from sklearn import svm
-from sklearn.exceptions import NotFittedError
-from sklearn import datasets
-from sklearn.datasets import load_breast_cancer
+from sklearn.svm import SVC, LinearSVC
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils import (
+    check_array,
+    shuffle,
+)
+from sklearn.utils._mocking import CheckingClassifier
+from sklearn.utils._testing import assert_almost_equal, assert_array_equal
+from sklearn.utils.multiclass import check_classification_targets, type_of_target
 
 msg = "The default value for `force_alpha` will change"
 pytestmark = pytest.mark.filterwarnings(f"ignore:{msg}:FutureWarning")
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index ce629dea785af..8b4b88ad4c558 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -1,42 +1,54 @@
-import pytest
+import re
+
 import numpy as np
+import pytest
 import scipy.sparse as sp
 from joblib import cpu_count
-import re
 
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
 from sklearn import datasets
-from sklearn.base import clone
-from sklearn.datasets import make_classification
-from sklearn.datasets import load_linnerud
-from sklearn.datasets import make_multilabel_classification
-from sklearn.datasets import make_regression
-from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier
+from sklearn.base import ClassifierMixin, clone
+from sklearn.datasets import (
+    load_linnerud,
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import (
+    GradientBoostingRegressor,
+    RandomForestClassifier,
+    StackingRegressor,
+)
 from sklearn.exceptions import NotFittedError
-from sklearn.linear_model import Lasso
-from sklearn.linear_model import LogisticRegression
-from sklearn.linear_model import OrthogonalMatchingPursuit
-from sklearn.linear_model import Ridge
-from sklearn.linear_model import PassiveAggressiveClassifier
-from sklearn.linear_model import SGDClassifier
-from sklearn.linear_model import SGDRegressor
-from sklearn.linear_model import LinearRegression
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import (
+    Lasso,
+    LinearRegression,
+    LogisticRegression,
+    OrthogonalMatchingPursuit,
+    PassiveAggressiveClassifier,
+    Ridge,
+    SGDClassifier,
+    SGDRegressor,
+)
 from sklearn.metrics import jaccard_score, mean_squared_error
+from sklearn.model_selection import GridSearchCV, train_test_split
 from sklearn.multiclass import OneVsRestClassifier
-from sklearn.multioutput import ClassifierChain, RegressorChain
-from sklearn.multioutput import MultiOutputClassifier
-from sklearn.multioutput import MultiOutputRegressor
+from sklearn.multioutput import (
+    ClassifierChain,
+    MultiOutputClassifier,
+    MultiOutputRegressor,
+    RegressorChain,
+)
+from sklearn.pipeline import make_pipeline
 from sklearn.svm import LinearSVC
 from sklearn.tree import DecisionTreeClassifier
-from sklearn.base import ClassifierMixin
 from sklearn.utils import shuffle
-from sklearn.model_selection import GridSearchCV, train_test_split
-from sklearn.dummy import DummyRegressor, DummyClassifier
-from sklearn.pipeline import make_pipeline
-from sklearn.impute import SimpleImputer
-from sklearn.ensemble import StackingRegressor
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 
 def test_multi_target_regression():
diff --git a/sklearn/tests/test_naive_bayes.py b/sklearn/tests/test_naive_bayes.py
index 4516fabb8961d..0dddb3201be0e 100644
--- a/sklearn/tests/test_naive_bayes.py
+++ b/sklearn/tests/test_naive_bayes.py
@@ -1,25 +1,26 @@
 import re
+import warnings
 
 import numpy as np
-import scipy.sparse
 import pytest
-import warnings
-
+import scipy.sparse
 from scipy.special import logsumexp
 
 from sklearn.datasets import load_digits, load_iris
-
-from sklearn.model_selection import train_test_split
-from sklearn.model_selection import cross_val_score
-
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose
-
-from sklearn.naive_bayes import GaussianNB, BernoulliNB
-from sklearn.naive_bayes import MultinomialNB, ComplementNB
-from sklearn.naive_bayes import CategoricalNB
+from sklearn.model_selection import cross_val_score, train_test_split
+from sklearn.naive_bayes import (
+    BernoulliNB,
+    CategoricalNB,
+    ComplementNB,
+    GaussianNB,
+    MultinomialNB,
+)
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 DISCRETE_NAIVE_BAYES_CLASSES = [BernoulliNB, CategoricalNB, ComplementNB, MultinomialNB]
 ALL_NAIVE_BAYES_CLASSES = DISCRETE_NAIVE_BAYES_CLASSES + [GaussianNB]
diff --git a/sklearn/tests/test_pipeline.py b/sklearn/tests/test_pipeline.py
index 28067ea316074..449e1468674e7 100644
--- a/sklearn/tests/test_pipeline.py
+++ b/sklearn/tests/test_pipeline.py
@@ -1,44 +1,43 @@
 """
 Test the pipeline module.
 """
-from tempfile import mkdtemp
+import itertools
+import re
 import shutil
 import time
-import re
-import itertools
+from tempfile import mkdtemp
 
-import pytest
+import joblib
 import numpy as np
+import pytest
 from scipy import sparse
-import joblib
 
+from sklearn.base import BaseEstimator, TransformerMixin, clone, is_classifier
+from sklearn.cluster import KMeans
+from sklearn.datasets import load_iris
+from sklearn.decomposition import PCA, TruncatedSVD
+from sklearn.dummy import DummyRegressor
+from sklearn.ensemble import HistGradientBoostingClassifier
+from sklearn.exceptions import NotFittedError
+from sklearn.feature_extraction.text import CountVectorizer
+from sklearn.feature_selection import SelectKBest, f_classif
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import Lasso, LinearRegression, LogisticRegression
+from sklearn.metrics import accuracy_score, r2_score
+from sklearn.model_selection import train_test_split
+from sklearn.neighbors import LocalOutlierFactor
+from sklearn.pipeline import FeatureUnion, Pipeline, make_pipeline, make_union
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
 from sklearn.utils._testing import (
-    assert_allclose,
-    assert_array_equal,
-    assert_array_almost_equal,
     MinimalClassifier,
     MinimalRegressor,
     MinimalTransformer,
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
 )
-from sklearn.exceptions import NotFittedError
-from sklearn.model_selection import train_test_split
 from sklearn.utils.validation import check_is_fitted
-from sklearn.base import clone, is_classifier, BaseEstimator, TransformerMixin
-from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union
-from sklearn.svm import SVC
-from sklearn.neighbors import LocalOutlierFactor
-from sklearn.linear_model import LogisticRegression, Lasso
-from sklearn.linear_model import LinearRegression
-from sklearn.metrics import accuracy_score, r2_score
-from sklearn.cluster import KMeans
-from sklearn.feature_selection import SelectKBest, f_classif
-from sklearn.dummy import DummyRegressor
-from sklearn.decomposition import PCA, TruncatedSVD
-from sklearn.datasets import load_iris
-from sklearn.preprocessing import StandardScaler
-from sklearn.feature_extraction.text import CountVectorizer
-from sklearn.ensemble import HistGradientBoostingClassifier
-from sklearn.impute import SimpleImputer
 
 iris = load_iris()
 
@@ -229,7 +228,7 @@ def test_pipeline_invalid_parameters():
 
     # Test clone
     pipe2 = clone(pipe)
-    assert not pipe.named_steps["svc"] is pipe2.named_steps["svc"]
+    assert pipe.named_steps["svc"] is not pipe2.named_steps["svc"]
 
     # Check that apart from estimators, the parameters are the same
     params = pipe.get_params(deep=True)
diff --git a/sklearn/tests/test_public_functions.py b/sklearn/tests/test_public_functions.py
index 3157e344cbef3..6ad481c438ab3 100644
--- a/sklearn/tests/test_public_functions.py
+++ b/sklearn/tests/test_public_functions.py
@@ -4,11 +4,13 @@
 
 import pytest
 
-from sklearn.utils._param_validation import generate_invalid_param_val
-from sklearn.utils._param_validation import generate_valid_param
-from sklearn.utils._param_validation import make_constraint
-from sklearn.utils._param_validation import InvalidParameterError
-from sklearn.utils._param_validation import Interval
+from sklearn.utils._param_validation import (
+    Interval,
+    InvalidParameterError,
+    generate_invalid_param_val,
+    generate_valid_param,
+    make_constraint,
+)
 
 
 def _get_func_info(func_module):
@@ -131,6 +133,7 @@ def _check_function_param_validation(
     "sklearn.datasets.fetch_lfw_people",
     "sklearn.datasets.fetch_olivetti_faces",
     "sklearn.datasets.fetch_rcv1",
+    "sklearn.datasets.fetch_openml",
     "sklearn.datasets.fetch_species_distributions",
     "sklearn.datasets.get_data_home",
     "sklearn.datasets.load_breast_cancer",
@@ -267,7 +270,6 @@ def _check_function_param_validation(
     "sklearn.preprocessing.add_dummy_feature",
     "sklearn.preprocessing.binarize",
     "sklearn.preprocessing.label_binarize",
-    "sklearn.preprocessing.maxabs_scale",
     "sklearn.preprocessing.normalize",
     "sklearn.preprocessing.scale",
     "sklearn.random_projection.johnson_lindenstrauss_min_dim",
@@ -296,6 +298,7 @@ def test_function_param_validation(func_module):
 
 PARAM_VALIDATION_CLASS_WRAPPER_LIST = [
     ("sklearn.cluster.affinity_propagation", "sklearn.cluster.AffinityPropagation"),
+    ("sklearn.cluster.k_means", "sklearn.cluster.KMeans"),
     ("sklearn.cluster.mean_shift", "sklearn.cluster.MeanShift"),
     ("sklearn.cluster.spectral_clustering", "sklearn.cluster.SpectralClustering"),
     ("sklearn.covariance.graphical_lasso", "sklearn.covariance.GraphicalLasso"),
@@ -304,6 +307,7 @@ def test_function_param_validation(func_module):
     ("sklearn.decomposition.dict_learning", "sklearn.decomposition.DictionaryLearning"),
     ("sklearn.decomposition.fastica", "sklearn.decomposition.FastICA"),
     ("sklearn.decomposition.non_negative_factorization", "sklearn.decomposition.NMF"),
+    ("sklearn.preprocessing.maxabs_scale", "sklearn.preprocessing.MaxAbsScaler"),
     ("sklearn.preprocessing.minmax_scale", "sklearn.preprocessing.MinMaxScaler"),
     ("sklearn.preprocessing.power_transform", "sklearn.preprocessing.PowerTransformer"),
     (
diff --git a/sklearn/tests/test_random_projection.py b/sklearn/tests/test_random_projection.py
index 229789516f167..c91833590a591 100644
--- a/sklearn/tests/test_random_projection.py
+++ b/sklearn/tests/test_random_projection.py
@@ -1,25 +1,27 @@
 import functools
-from typing import List, Any
 import warnings
+from typing import Any, List
 
 import numpy as np
-import scipy.sparse as sp
 import pytest
+import scipy.sparse as sp
 
-from sklearn.metrics import euclidean_distances
-
-from sklearn.random_projection import johnson_lindenstrauss_min_dim
-from sklearn.random_projection import _gaussian_random_matrix
-from sklearn.random_projection import _sparse_random_matrix
-from sklearn.random_projection import SparseRandomProjection
-from sklearn.random_projection import GaussianRandomProjection
-
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_allclose_dense_sparse
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_array_almost_equal
 from sklearn.exceptions import DataDimensionalityWarning
+from sklearn.metrics import euclidean_distances
+from sklearn.random_projection import (
+    GaussianRandomProjection,
+    SparseRandomProjection,
+    _gaussian_random_matrix,
+    _sparse_random_matrix,
+    johnson_lindenstrauss_min_dim,
+)
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
 
 all_sparse_random_matrix: List[Any] = [_sparse_random_matrix]
 all_dense_random_matrix: List[Any] = [_gaussian_random_matrix]
diff --git a/sklearn/tree/__init__.py b/sklearn/tree/__init__.py
index f7a8fd183c7cc..8cfb42c73e118 100644
--- a/sklearn/tree/__init__.py
+++ b/sklearn/tree/__init__.py
@@ -3,12 +3,14 @@
 classification and regression.
 """
 
-from ._classes import BaseDecisionTree
-from ._classes import DecisionTreeClassifier
-from ._classes import DecisionTreeRegressor
-from ._classes import ExtraTreeClassifier
-from ._classes import ExtraTreeRegressor
-from ._export import export_graphviz, plot_tree, export_text
+from ._classes import (
+    BaseDecisionTree,
+    DecisionTreeClassifier,
+    DecisionTreeRegressor,
+    ExtraTreeClassifier,
+    ExtraTreeRegressor,
+)
+from ._export import export_graphviz, export_text, plot_tree
 
 __all__ = [
     "BaseDecisionTree",
diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index adf547ee3ccc5..1721cd891c302 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -14,43 +14,44 @@
 #
 # License: BSD 3 clause
 
+import copy
 import numbers
 import warnings
-import copy
-from abc import ABCMeta
-from abc import abstractmethod
+from abc import ABCMeta, abstractmethod
 from math import ceil
 from numbers import Integral, Real
 
 import numpy as np
 from scipy.sparse import issparse
 
-from ..base import BaseEstimator
-from ..base import ClassifierMixin
-from ..base import clone
-from ..base import RegressorMixin
-from ..base import is_classifier
-from ..base import MultiOutputMixin
-from ..base import _fit_context
-from ..utils import Bunch
-from ..utils import check_random_state
-from ..utils.validation import _check_sample_weight
-from ..utils.validation import assert_all_finite
-from ..utils.validation import _assert_all_finite_element_wise
-from ..utils import compute_sample_weight
+from ..base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MultiOutputMixin,
+    RegressorMixin,
+    _fit_context,
+    clone,
+    is_classifier,
+)
+from ..utils import Bunch, check_random_state, compute_sample_weight
+from ..utils._param_validation import Hidden, Interval, RealNotInt, StrOptions
 from ..utils.multiclass import check_classification_targets
-from ..utils.validation import check_is_fitted
-from ..utils._param_validation import Hidden, Interval, StrOptions
-from ..utils._param_validation import RealNotInt
-
+from ..utils.validation import (
+    _assert_all_finite_element_wise,
+    _check_sample_weight,
+    assert_all_finite,
+    check_is_fitted,
+)
+from . import _criterion, _splitter, _tree
 from ._criterion import Criterion
 from ._splitter import Splitter
-from ._tree import DepthFirstTreeBuilder
-from ._tree import BestFirstTreeBuilder
-from ._tree import Tree
-from ._tree import _build_pruned_tree_ccp
-from ._tree import ccp_pruning_path
-from . import _tree, _splitter, _criterion
+from ._tree import (
+    BestFirstTreeBuilder,
+    DepthFirstTreeBuilder,
+    Tree,
+    _build_pruned_tree_ccp,
+    ccp_pruning_path,
+)
 from ._utils import _any_isnan_axis0
 
 __all__ = [
diff --git a/sklearn/tree/_export.py b/sklearn/tree/_export.py
index 0cdfd583144e1..ff0d6db5c25a5 100644
--- a/sklearn/tree/_export.py
+++ b/sklearn/tree/_export.py
@@ -17,15 +17,11 @@
 
 import numpy as np
 
-from ..utils.validation import check_is_fitted, check_array
-from ..utils._param_validation import Interval, validate_params, StrOptions, HasMethods
-
 from ..base import is_classifier
-
-from . import _criterion
-from . import _tree
-from ._reingold_tilford import buchheim, Tree
-from . import DecisionTreeClassifier, DecisionTreeRegressor
+from ..utils._param_validation import HasMethods, Interval, StrOptions, validate_params
+from ..utils.validation import check_array, check_is_fitted
+from . import DecisionTreeClassifier, DecisionTreeRegressor, _criterion, _tree
+from ._reingold_tilford import Tree, buchheim
 
 
 def _color_brew(n):
@@ -82,8 +78,8 @@ def __repr__(self):
     {
         "decision_tree": [DecisionTreeClassifier, DecisionTreeRegressor],
         "max_depth": [Interval(Integral, 0, None, closed="left"), None],
-        "feature_names": [list, None],
-        "class_names": [list, None],
+        "feature_names": ["array-like", None],
+        "class_names": ["array-like", "boolean", None],
         "label": [StrOptions({"all", "root", "none"})],
         "filled": ["boolean"],
         "impurity": ["boolean"],
@@ -93,7 +89,8 @@ def __repr__(self):
         "precision": [Interval(Integral, 0, None, closed="left"), None],
         "ax": "no_validation",  # delegate validation to matplotlib
         "fontsize": [Interval(Integral, 0, None, closed="left"), None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def plot_tree(
     decision_tree,
@@ -133,11 +130,11 @@ def plot_tree(
         The maximum depth of the representation. If None, the tree is fully
         generated.
 
-    feature_names : list of str, default=None
+    feature_names : array-like of str, default=None
         Names of each of the features.
         If None, generic names will be used ("x[0]", "x[1]", ...).
 
-    class_names : list of str or bool, default=None
+    class_names : array-like of str or True, default=None
         Names of each of the target classes in ascending numerical order.
         Only relevant for classification and not supported for multi-output.
         If ``True``, shows a symbolic representation of the class name.
@@ -746,7 +743,8 @@ def recurse(self, node, tree, ax, max_x, max_y, depth=0):
         "special_characters": ["boolean"],
         "precision": [Interval(Integral, 0, None, closed="left"), None],
         "fontname": [str],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def export_graphviz(
     decision_tree,
@@ -947,7 +945,8 @@ def compute_depth_(
         "spacing": [Interval(Integral, 1, None, closed="left"), None],
         "decimals": [Interval(Integral, 0, None, closed="left"), None],
         "show_weights": ["boolean"],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def export_text(
     decision_tree,
diff --git a/sklearn/tree/_splitter.pyx b/sklearn/tree/_splitter.pyx
index 7e60f0023d2a2..e578d115dec8e 100644
--- a/sklearn/tree/_splitter.pyx
+++ b/sklearn/tree/_splitter.pyx
@@ -20,7 +20,7 @@ from cython cimport final
 
 import numpy as np
 
-from scipy.sparse import isspmatrix_csc
+from scipy.sparse import issparse
 
 from ._utils cimport log
 from ._utils cimport rand_int
@@ -208,8 +208,13 @@ cdef class Splitter:
         weighted_n_node_samples[0] = self.criterion.weighted_n_node_samples
         return 0
 
-    cdef int node_split(self, double impurity, SplitRecord* split,
-                        SIZE_t* n_constant_features) except -1 nogil:
+    cdef int node_split(
+        self,
+        double impurity,
+        SplitRecord* split,
+        SIZE_t* n_constant_features,
+    ) except -1 nogil:
+
         """Find the best split on node samples[start:end].
 
         This is a placeholder method. The majority of computation will be done
@@ -1041,7 +1046,7 @@ cdef class SparsePartitioner:
         DTYPE_t[::1] feature_values,
         const unsigned char[::1] missing_values_in_feature_mask,
     ):
-        if not isspmatrix_csc(X):
+        if not (issparse(X) and X.format == "csc"):
             raise ValueError("X should be in csc format")
 
         self.samples = samples
diff --git a/sklearn/tree/_tree.pyx b/sklearn/tree/_tree.pyx
index e7a0ab2f2966d..c843ad7500480 100644
--- a/sklearn/tree/_tree.pyx
+++ b/sklearn/tree/_tree.pyx
@@ -32,7 +32,6 @@ cnp.import_array()
 
 from scipy.sparse import issparse
 from scipy.sparse import csr_matrix
-from scipy.sparse import isspmatrix_csr
 
 from ._utils cimport safe_realloc
 from ._utils cimport sizet_ptr_to_ndarray
@@ -464,11 +463,19 @@ cdef class BestFirstTreeBuilder(TreeBuilder):
         if rc == -1:
             raise MemoryError()
 
-    cdef inline int _add_split_node(self, Splitter splitter, Tree tree,
-                                    SIZE_t start, SIZE_t end, double impurity,
-                                    bint is_first, bint is_left, Node* parent,
-                                    SIZE_t depth,
-                                    FrontierRecord* res) except -1 nogil:
+    cdef inline int _add_split_node(
+        self,
+        Splitter splitter,
+        Tree tree,
+        SIZE_t start,
+        SIZE_t end,
+        double impurity,
+        bint is_first,
+        bint is_left,
+        Node* parent,
+        SIZE_t depth,
+        FrontierRecord* res
+    ) except -1 nogil:
         """Adds node w/ partition ``[start, end)`` to the frontier. """
         cdef SplitRecord split
         cdef SIZE_t node_id
@@ -574,6 +581,9 @@ cdef class Tree:
         children_right[i] > i. This child handles the case where
         X[:, feature[i]] > threshold[i].
 
+    n_leaves : int
+        Number of leaves in the tree.
+
     feature : array of int, shape [node_count]
         feature[i] holds the feature to split on, for the internal node i.
 
@@ -593,6 +603,10 @@ cdef class Tree:
     weighted_n_node_samples : array of double, shape [node_count]
         weighted_n_node_samples[i] holds the weighted number of training samples
         reaching node i.
+
+    missing_go_to_left : array of bool, shape [node_count]
+        missing_go_to_left[i] holds a bool indicating whether or not there were
+        missing values at node i.
     """
     # Wrap for outside world.
     # WARNING: these reference the current `nodes` and `value` buffers, which
@@ -757,11 +771,13 @@ cdef class Tree:
         safe_realloc(&self.nodes, capacity)
         safe_realloc(&self.value, capacity * self.value_stride)
 
-        # value memory is initialised to 0 to enable classifier argmax
         if capacity > self.capacity:
+            # value memory is initialised to 0 to enable classifier argmax
             memset(<void*>(self.value + self.capacity * self.value_stride), 0,
                    (capacity - self.capacity) * self.value_stride *
                    sizeof(double))
+            # node memory is initialised to 0 to ensure deterministic pickle (padding in Node struct)
+            memset(<void*>(self.nodes + self.capacity), 0, (capacity - self.capacity) * sizeof(Node))
 
         # if capacity smaller than node_count, adjust the counter
         if capacity < self.node_count:
@@ -877,7 +893,7 @@ cdef class Tree:
         """Finds the terminal region (=leaf node) for each sample in sparse X.
         """
         # Check input
-        if not isspmatrix_csr(X):
+        if not (issparse(X) and X.format == 'csr'):
             raise ValueError("X should be in csr_matrix format, got %s"
                              % type(X))
 
@@ -1005,7 +1021,7 @@ cdef class Tree:
         """Finds the decision path (=node) for each sample in X."""
 
         # Check input
-        if not isspmatrix_csr(X):
+        if not (issparse(X) and X.format == "csr"):
             raise ValueError("X should be in csr_matrix format, got %s"
                              % type(X))
 
diff --git a/sklearn/tree/_utils.pxd b/sklearn/tree/_utils.pxd
index 4938d3030245f..4b953af2d9b2b 100644
--- a/sklearn/tree/_utils.pxd
+++ b/sklearn/tree/_utils.pxd
@@ -23,7 +23,9 @@ cdef enum:
     # Max value for our rand_r replacement (near the bottom).
     # We don't use RAND_MAX because it's different across platforms and
     # particularly tiny on Windows/MSVC.
-    RAND_R_MAX = 0x7FFFFFFF
+    # It corresponds to the maximum representable value for
+    # 32-bit signed integers (i.e. 2^31 - 1).
+    RAND_R_MAX = 2147483647
 
 
 # safe_realloc(&p, n) resizes the allocation of p to n * sizeof(*p) bytes or
diff --git a/sklearn/tree/tests/test_export.py b/sklearn/tree/tests/test_export.py
index 1dc0fd7b9d8f4..169c667b4ff3f 100644
--- a/sklearn/tree/tests/test_export.py
+++ b/sklearn/tree/tests/test_export.py
@@ -1,19 +1,24 @@
 """
 Testing for export functions of decision trees (sklearn.tree.export).
 """
+from io import StringIO
 from re import finditer, search
 from textwrap import dedent
 
 import numpy as np
-from numpy.random import RandomState
 import pytest
+from numpy.random import RandomState
 
 from sklearn.base import is_classifier
-from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.ensemble import GradientBoostingClassifier
-from sklearn.tree import export_graphviz, plot_tree, export_text
-from io import StringIO
 from sklearn.exceptions import NotFittedError
+from sklearn.tree import (
+    DecisionTreeClassifier,
+    DecisionTreeRegressor,
+    export_graphviz,
+    export_text,
+    plot_tree,
+)
 
 # toy sample
 X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
diff --git a/sklearn/tree/tests/test_reingold_tilford.py b/sklearn/tree/tests/test_reingold_tilford.py
index 8f38c997a48d7..bf0ce3ce2cffc 100644
--- a/sklearn/tree/tests/test_reingold_tilford.py
+++ b/sklearn/tree/tests/test_reingold_tilford.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
-from sklearn.tree._reingold_tilford import buchheim, Tree
+
+from sklearn.tree._reingold_tilford import Tree, buchheim
 
 simple_tree = Tree("", 0, Tree("", 1), Tree("", 2))
 
diff --git a/sklearn/tree/tests/test_tree.py b/sklearn/tree/tests/test_tree.py
index eefae6cdaa3f6..3dac1b38693c0 100644
--- a/sklearn/tree/tests/test_tree.py
+++ b/sklearn/tree/tests/test_tree.py
@@ -2,65 +2,57 @@
 Testing for the tree module (sklearn.tree).
 """
 import copy
+import copyreg
+import io
 import pickle
-from itertools import product, chain
 import struct
-import io
-import copyreg
-
-import pytest
-import numpy as np
-from numpy.testing import assert_allclose
-from scipy.sparse import csc_matrix
-from scipy.sparse import csr_matrix
-from scipy.sparse import coo_matrix
+from itertools import chain, product
 
 import joblib
+import numpy as np
+import pytest
 from joblib.numpy_pickle import NumpyPickler
+from numpy.testing import assert_allclose
+from scipy.sparse import coo_matrix, csc_matrix, csr_matrix
 
-from sklearn.random_projection import _sparse_random_matrix
-
+from sklearn import datasets, tree
 from sklearn.dummy import DummyRegressor
-
-from sklearn.metrics import accuracy_score
-from sklearn.metrics import mean_squared_error
-from sklearn.metrics import mean_poisson_deviance
-
+from sklearn.exceptions import NotFittedError
+from sklearn.metrics import accuracy_score, mean_poisson_deviance, mean_squared_error
 from sklearn.model_selection import train_test_split
-
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import create_memmap_backed_data
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils._testing import skip_if_32bit
-
+from sklearn.random_projection import _sparse_random_matrix
+from sklearn.tree import (
+    DecisionTreeClassifier,
+    DecisionTreeRegressor,
+    ExtraTreeClassifier,
+    ExtraTreeRegressor,
+)
+from sklearn.tree._classes import (
+    CRITERIA_CLF,
+    CRITERIA_REG,
+    DENSE_SPLITTERS,
+    SPARSE_SPLITTERS,
+)
+from sklearn.tree._tree import (
+    NODE_DTYPE,
+    TREE_LEAF,
+    TREE_UNDEFINED,
+    _check_n_classes,
+    _check_node_ndarray,
+    _check_value_ndarray,
+)
+from sklearn.tree._tree import Tree as CythonTree
+from sklearn.utils import _IS_32BIT, compute_sample_weight
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    create_memmap_backed_data,
+    ignore_warnings,
+    skip_if_32bit,
+)
 from sklearn.utils.estimator_checks import check_sample_weights_invariance
 from sklearn.utils.validation import check_random_state
-from sklearn.utils import _IS_32BIT
-
-from sklearn.exceptions import NotFittedError
-
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.tree import DecisionTreeRegressor
-from sklearn.tree import ExtraTreeClassifier
-from sklearn.tree import ExtraTreeRegressor
-
-from sklearn import tree
-from sklearn.tree._tree import TREE_LEAF, TREE_UNDEFINED
-from sklearn.tree._tree import Tree as CythonTree
-from sklearn.tree._tree import _check_n_classes
-from sklearn.tree._tree import _check_value_ndarray
-from sklearn.tree._tree import _check_node_ndarray
-from sklearn.tree._tree import NODE_DTYPE
-
-from sklearn.tree._classes import CRITERIA_CLF
-from sklearn.tree._classes import CRITERIA_REG
-from sklearn import datasets
-
-from sklearn.utils import compute_sample_weight
-from sklearn.tree._classes import DENSE_SPLITTERS, SPARSE_SPLITTERS
-
 
 CLF_CRITERIONS = ("gini", "log_loss")
 REG_CRITERIONS = ("squared_error", "absolute_error", "friedman_mse", "poisson")
@@ -810,10 +802,10 @@ def test_min_weight_fraction_leaf_with_min_samples_leaf_on_sparse_input(name):
     check_min_weight_fraction_leaf_with_min_samples_leaf(name, "multilabel", True)
 
 
-def test_min_impurity_decrease():
+def test_min_impurity_decrease(global_random_seed):
     # test if min_impurity_decrease ensure that a split is made only if
     # if the impurity decrease is at least that value
-    X, y = datasets.make_classification(n_samples=10000, random_state=42)
+    X, y = datasets.make_classification(n_samples=100, random_state=global_random_seed)
 
     # test both DepthFirstTreeBuilder and BestFirstTreeBuilder
     # by setting max_leaf_nodes
@@ -2634,3 +2626,16 @@ def test_sample_weight_non_uniform(make_data, Tree):
     tree_samples_removed.fit(X[1::2, :], y[1::2])
 
     assert_allclose(tree_samples_removed.predict(X), tree_with_sw.predict(X))
+
+
+def test_deterministic_pickle():
+    # Non-regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/27268
+    # Uninitialised memory would lead to the two pickle strings being different.
+    tree1 = DecisionTreeClassifier(random_state=0).fit(iris.data, iris.target)
+    tree2 = DecisionTreeClassifier(random_state=0).fit(iris.data, iris.target)
+
+    pickle1 = pickle.dumps(tree1)
+    pickle2 = pickle.dumps(tree2)
+
+    assert pickle1 == pickle2
diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py
index a2cc4a9a7c56f..2322765bd3904 100644
--- a/sklearn/utils/__init__.py
+++ b/sklearn/utils/__init__.py
@@ -1,48 +1,43 @@
 """
 The :mod:`sklearn.utils` module includes various utilities.
 """
-from collections.abc import Sequence
-from contextlib import contextmanager
-from itertools import compress
-from itertools import islice
 import math
 import numbers
 import platform
 import struct
 import timeit
-from contextlib import suppress
-
 import warnings
+from collections.abc import Sequence
+from contextlib import contextmanager, suppress
+from itertools import compress, islice
+
 import numpy as np
 from scipy.sparse import issparse
 
-from . import metadata_routing
-
-from .murmurhash import murmurhash3_32
-from .class_weight import compute_class_weight, compute_sample_weight
-from . import _joblib
+from .. import get_config
 from ..exceptions import DataConversionWarning
+from . import _joblib, metadata_routing
+from ._bunch import Bunch
+from ._estimator_html_repr import estimator_html_repr
+from ._param_validation import Interval, validate_params
+from .class_weight import compute_class_weight, compute_sample_weight
 from .deprecation import deprecated
 from .discovery import all_estimators
 from .fixes import parse_version, threadpool_info
-from ._estimator_html_repr import estimator_html_repr
+from .murmurhash import murmurhash3_32
 from .validation import (
+    _is_arraylike_not_scalar,
     as_float_array,
     assert_all_finite,
-    check_random_state,
-    column_or_1d,
     check_array,
     check_consistent_length,
+    check_random_state,
+    check_scalar,
+    check_symmetric,
     check_X_y,
+    column_or_1d,
     indexable,
-    check_symmetric,
-    check_scalar,
-    _is_arraylike_not_scalar,
 )
-from .. import get_config
-from ._bunch import Bunch
-from ._param_validation import validate_params, Interval
-
 
 # Do not deprecate parallel_backend and register_parallel_backend as they are
 # needed to tune `scikit-learn` behavior and have different effect if called
@@ -403,7 +398,7 @@ def _get_column_indices(X, key):
     """Get feature column indices for input data X and key.
 
     For accepted values of `key`, see the docstring of
-    :func:`_safe_indexing_column`.
+    :func:`_safe_indexing`.
     """
     n_columns = X.shape[1]
 
@@ -474,7 +469,8 @@ def _get_column_indices(X, key):
         "n_samples": [Interval(numbers.Integral, 1, None, closed="left"), None],
         "random_state": ["random_state"],
         "stratify": ["array-like", None],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def resample(*arrays, replace=True, n_samples=None, random_state=None, stratify=None):
     """Resample arrays or sparse matrices in a consistent way.
@@ -742,7 +738,8 @@ def _chunk_generator(gen, chunksize):
         "n": [Interval(numbers.Integral, 1, None, closed="left")],
         "batch_size": [Interval(numbers.Integral, 1, None, closed="left")],
         "min_batch_size": [Interval(numbers.Integral, 0, None, closed="left")],
-    }
+    },
+    prefer_skip_nested_validation=True,
 )
 def gen_batches(n, batch_size, *, min_batch_size=0):
     """Generator to create slices containing `batch_size` elements from 0 to `n`.
diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py
index 13ab96b866fc6..78be9d3e679b2 100644
--- a/sklearn/utils/_array_api.py
+++ b/sklearn/utils/_array_api.py
@@ -1,6 +1,6 @@
 """Tools to support array_api."""
-from functools import wraps
 import math
+from functools import wraps
 
 import numpy
 import scipy.special as special
diff --git a/sklearn/utils/_available_if.py b/sklearn/utils/_available_if.py
index 643f71d44ad49..3f6d50aa123c5 100644
--- a/sklearn/utils/_available_if.py
+++ b/sklearn/utils/_available_if.py
@@ -1,6 +1,5 @@
+from functools import update_wrapper, wraps
 from types import MethodType
-from functools import wraps
-from functools import update_wrapper
 
 
 class _AvailableIfDescriptor:
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index de48890fcaacf..b3bf1c2a317ec 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -1,8 +1,9 @@
-from contextlib import suppress
 from collections import Counter
+from contextlib import suppress
 from typing import NamedTuple
 
 import numpy as np
+
 from . import is_scalar_nan
 
 
@@ -176,7 +177,7 @@ def _unique_python(values, *, return_inverse, return_counts):
     except TypeError:
         types = sorted(t.__qualname__ for t in set(type(v) for v in values))
         raise TypeError(
-            "Encoders require their input to be uniformly "
+            "Encoders require their input argument must be uniformly "
             f"strings or numbers. Got {types}"
         )
     ret = (uniques,)
@@ -295,7 +296,7 @@ def is_valid(value):
         diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
         if return_mask:
             if diff.size:
-                valid_mask = np.in1d(values, known_values)
+                valid_mask = np.isin(values, known_values)
             else:
                 valid_mask = np.ones(len(values), dtype=bool)
 
diff --git a/sklearn/utils/_estimator_html_repr.py b/sklearn/utils/_estimator_html_repr.py
index 466467f4cd341..e9b95666cdd32 100644
--- a/sklearn/utils/_estimator_html_repr.py
+++ b/sklearn/utils/_estimator_html_repr.py
@@ -1,8 +1,8 @@
+import html
 from contextlib import closing
-from io import StringIO
 from inspect import isclass
+from io import StringIO
 from string import Template
-import html
 
 from .. import config_context
 
diff --git a/sklearn/utils/_joblib.py b/sklearn/utils/_joblib.py
index 8cbe084c94992..590fdc6170c64 100644
--- a/sklearn/utils/_joblib.py
+++ b/sklearn/utils/_joblib.py
@@ -5,13 +5,20 @@
     # joblib imports may raise DeprecationWarning on certain Python
     # versions
     import joblib
-    from joblib import logger
-    from joblib import dump, load
-    from joblib import __version__
-    from joblib import effective_n_jobs
-    from joblib import hash
-    from joblib import cpu_count, Parallel, Memory, delayed
-    from joblib import parallel_backend, register_parallel_backend
+    from joblib import (
+        Memory,
+        Parallel,
+        __version__,
+        cpu_count,
+        delayed,
+        dump,
+        effective_n_jobs,
+        hash,
+        load,
+        logger,
+        parallel_backend,
+        register_parallel_backend,
+    )
 
 
 __all__ = [
diff --git a/sklearn/utils/_mask.py b/sklearn/utils/_mask.py
index d57cf839d962f..07332bf1edbd4 100644
--- a/sklearn/utils/_mask.py
+++ b/sklearn/utils/_mask.py
@@ -1,6 +1,7 @@
+from contextlib import suppress
+
 import numpy as np
 from scipy import sparse as sp
-from contextlib import suppress
 
 from . import is_scalar_nan
 from .fixes import _object_dtype_isnan
diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py
index a1cd934c13756..b3957d0dda240 100644
--- a/sklearn/utils/_metadata_requests.py
+++ b/sklearn/utils/_metadata_requests.py
@@ -142,6 +142,9 @@ def _routing_enabled():
 def request_is_alias(item):
     """Check if an item is a valid alias.
 
+    Values in ``VALID_REQUEST_VALUES`` are not considered aliases in this
+    context. Only a string which is a valid identifier is.
+
     Parameters
     ----------
     item : object
@@ -234,8 +237,9 @@ def add_request(
         """
         if not request_is_alias(alias) and not request_is_valid(alias):
             raise ValueError(
-                "alias should be either a valid identifier or one of "
-                "{None, True, False}."
+                f"The alias you're setting for `{param}` should be either a "
+                "valid identifier or one of {None, True, False}, but given "
+                f"value is: `{alias}`"
             )
 
         if alias == param:
@@ -363,7 +367,7 @@ def __str__(self):
 class MetadataRequest:
     """Contains the metadata request info of a consumer.
 
-    Instances of :class:`MethodMetadataRequest` are used in this class for each
+    Instances of `MethodMetadataRequest` are used in this class for each
     available method under `metadatarequest.{method}`.
 
     Consumer-only classes such as simple estimators return a serialized
@@ -569,7 +573,7 @@ def from_str(cls, route):
         Returns
         -------
         obj : MethodMapping
-            A :class:`~utils.metadata_requests.MethodMapping` instance
+            A :class:`~sklearn.utils.metadata_routing.MethodMapping` instance
             constructed from the given string.
         """
         routing = cls()
@@ -595,10 +599,10 @@ class MetadataRouter:
     This class is used by router objects to store and handle metadata routing.
     Routing information is stored as a dictionary of the form ``{"object_name":
     RouteMappingPair(method_mapping, routing_info)}``, where ``method_mapping``
-    is an instance of :class:`~utils.metadata_requests.MethodMapping` and
+    is an instance of :class:`~sklearn.utils.metadata_routing.MethodMapping` and
     ``routing_info`` is either a
-    :class:`~utils.metadata_requests.MetadataRequest` or a
-    :class:`~utils.metadata_requests.MetadataRouter` instance.
+    :class:`~utils.metadata_routing.MetadataRequest` or a
+    :class:`~utils.metadata_routing.MetadataRouter` instance.
 
     .. versionadded:: 1.3
 
@@ -610,14 +614,15 @@ class MetadataRouter:
 
     # this is here for us to use this attribute's value instead of doing
     # `isinstance`` in our checks, so that we avoid issues when people vendor
-    # this file instad of using it directly from scikit-learn.
+    # this file instead of using it directly from scikit-learn.
     _type = "metadata_router"
 
     def __init__(self, owner):
         self._route_mappings = dict()
-        # `_self` is used if the router is also a consumer. _self, (added using
-        # `add_self_request()`) is treated differently from the other objects
-        # which are stored in _route_mappings.
+        # `_self_request` is used if the router is also a consumer.
+        # _self_request, (added using `add_self_request()`) is treated
+        # differently from the other objects which are stored in
+        # _route_mappings.
         self._self_request = None
         self.owner = owner
 
@@ -627,7 +632,7 @@ def add_self_request(self, obj):
         This method is used if the router is also a consumer, and hence the
         router itself needs to be included in the routing. The passed object
         can be an estimator or a
-        :class:``~utils.metadata_requests.MetadataRequest``.
+        :class:`~utils.metadata_routing.MetadataRequest`.
 
         A router should add itself using this method instead of `add` since it
         should be treated differently than the other objects to which metadata
@@ -664,12 +669,12 @@ def add(self, *, method_mapping, **objs):
         ----------
         method_mapping : MethodMapping or str
             The mapping between the child and the parent's methods. If str, the
-            output of :func:`~utils.metadata_requests.MethodMapping.from_str`
+            output of :func:`~sklearn.utils.metadata_routing.MethodMapping.from_str`
             is used.
 
         **objs : dict
             A dictionary of objects from which metadata is extracted by calling
-            :func:`~utils.metadata_requests.get_routing_for_object` on them.
+            :func:`~sklearn.utils.metadata_routing.get_routing_for_object` on them.
 
         Returns
         -------
@@ -753,7 +758,7 @@ def _route_params(self, *, params, method):
         Returns
         -------
         params : Bunch
-            A :class:`~utils.Bunch` of {prop: value} which can be given to the
+            A :class:`~sklearn.utils.Bunch` of {prop: value} which can be given to the
             corresponding method.
         """
         res = Bunch()
@@ -892,8 +897,8 @@ def get_routing_for_object(obj=None):
     """Get a ``Metadata{Router, Request}`` instance from the given object.
 
     This function returns a
-    :class:`~utils.metadata_request.MetadataRouter` or a
-    :class:`~utils.metadata_request.MetadataRequest` from the given input.
+    :class:`~sklearn.utils.metadata_routing.MetadataRouter` or a
+    :class:`~sklearn.utils.metadata_routing.MetadataRequest` from the given input.
 
     This function always returns a copy or an instance constructed from the
     input, such that changing the output of this function will not change the
@@ -905,12 +910,12 @@ def get_routing_for_object(obj=None):
     ----------
     obj : object
         - If the object is already a
-            :class:`~utils.metadata_requests.MetadataRequest` or a
-            :class:`~utils.metadata_requests.MetadataRouter`, return a copy
+            :class:`~sklearn.utils.metadata_routing.MetadataRequest` or a
+            :class:`~sklearn.utils.metadata_routing.MetadataRouter`, return a copy
             of that.
         - If the object provides a `get_metadata_routing` method, return a copy
             of the output of that method.
-        - Returns an empty :class:`~utils.metadata_requests.MetadataRequest`
+        - Returns an empty :class:`~sklearn.utils.metadata_routing.MetadataRequest`
             otherwise.
 
     Returns
@@ -968,7 +973,7 @@ def get_routing_for_object(obj=None):
         .. note::
             This method is only relevant if this estimator is used as a
             sub-estimator of a meta-estimator, e.g. used inside a
-            :class:`pipeline.Pipeline`. Otherwise it has no effect.
+            :class:`~sklearn.pipeline.Pipeline`. Otherwise it has no effect.
 
         Parameters
         ----------
@@ -1138,8 +1143,8 @@ def _build_request_for_signature(cls, router, method):
         """Build the `MethodMetadataRequest` for a method using its signature.
 
         This method takes all arguments from the method signature and uses
-        ``None`` as their default request value, except ``X``, ``y``,
-        ``*args``, and ``**kwargs``.
+        ``None`` as their default request value, except ``X``, ``y``, ``Y``,
+        ``Xt``, ``yt``, ``*args``, and ``**kwargs``.
 
         Parameters
         ----------
@@ -1175,7 +1180,7 @@ def _build_request_for_signature(cls, router, method):
     def _get_default_requests(cls):
         """Collect default request values.
 
-        This method combines the information present in ``metadata_request__*``
+        This method combines the information present in ``__metadata_request__*``
         class attributes, as well as determining request keys from method
         signatures.
         """
@@ -1226,7 +1231,7 @@ def _get_metadata_request(self):
         Returns
         -------
         request : MetadataRequest
-            A :class:`~.utils.metadata_requests.MetadataRequest` instance.
+            A :class:`~sklearn.utils.metadata_routing.MetadataRequest` instance.
         """
         if hasattr(self, "_metadata_request"):
             requests = get_routing_for_object(self._metadata_request)
@@ -1244,7 +1249,7 @@ def get_metadata_routing(self):
         Returns
         -------
         routing : MetadataRequest
-            A :class:`~utils.metadata_routing.MetadataRequest` encapsulating
+            A :class:`~sklearn.utils.metadata_routing.MetadataRequest` encapsulating
             routing information.
         """
         return self._get_metadata_request()
diff --git a/sklearn/utils/_mocking.py b/sklearn/utils/_mocking.py
index 83b5eecc34033..16acabf03755b 100644
--- a/sklearn/utils/_mocking.py
+++ b/sklearn/utils/_mocking.py
@@ -1,10 +1,9 @@
 import numpy as np
 
 from ..base import BaseEstimator, ClassifierMixin
-from .metaestimators import available_if
-from .validation import _check_sample_weight, _num_samples, check_array
-from .validation import check_is_fitted
 from ..utils._metadata_requests import RequestMethod
+from .metaestimators import available_if
+from .validation import _check_sample_weight, _num_samples, check_array, check_is_fitted
 
 
 class ArraySlicingWrapper:
@@ -71,10 +70,12 @@ class CheckingClassifier(ClassifierMixin, BaseEstimator):
     ----------
     check_y, check_X : callable, default=None
         The callable used to validate `X` and `y`. These callable should return
-        a bool where `False` will trigger an `AssertionError`.
+        a bool where `False` will trigger an `AssertionError`. If `None`, the
+        data is not validated. Default is `None`.
 
     check_y_params, check_X_params : dict, default=None
-        The optional parameters to pass to `check_X` and `check_y`.
+        The optional parameters to pass to `check_X` and `check_y`. If `None`,
+        then no parameters are passed in.
 
     methods_to_check : "all" or list of str, default="all"
         The methods in which the checks should be applied. By default,
@@ -149,8 +150,10 @@ def _check_X_y(self, X, y=None, should_be_fitted=True):
         ----------
         X : array-like of shape (n_samples, n_features)
             The data set.
+            `X` is checked only if `check_X` is not `None` (default is None).
         y : array-like of shape (n_samples), default=None
-            The corresponding target, by default None.
+            The corresponding target, by default `None`.
+            `y` is checked only if `check_y` is not `None` (default is None).
         should_be_fitted : bool, default=True
             Whether or not the classifier should be already fitted.
             By default True.
diff --git a/sklearn/utils/_param_validation.py b/sklearn/utils/_param_validation.py
index c97ca0bba8929..bf063a1945621 100644
--- a/sklearn/utils/_param_validation.py
+++ b/sklearn/utils/_param_validation.py
@@ -1,20 +1,17 @@
-from abc import ABC
-from abc import abstractmethod
-from collections.abc import Iterable
 import functools
 import math
-from inspect import signature
-from numbers import Integral
-from numbers import Real
 import operator
 import re
 import warnings
+from abc import ABC, abstractmethod
+from collections.abc import Iterable
+from inspect import signature
+from numbers import Integral, Real
 
 import numpy as np
-from scipy.sparse import issparse
-from scipy.sparse import csr_matrix
+from scipy.sparse import csr_matrix, issparse
 
-from .._config import get_config, config_context
+from .._config import config_context, get_config
 from .validation import _is_arraylike_not_scalar
 
 
@@ -49,6 +46,7 @@ def validate_parameter_constraints(parameter_constraints, params, caller_name):
         - the string "boolean"
         - the string "verbose"
         - the string "cv_object"
+        - the string "nan"
         - a MissingValues object representing markers for missing values
         - a HasMethods object, representing method(s) an object must have
         - a Hidden object, representing a constraint not meant to be exposed to the user
@@ -140,10 +138,12 @@ def make_constraint(constraint):
         constraint = make_constraint(constraint.constraint)
         constraint.hidden = True
         return constraint
+    if isinstance(constraint, str) and constraint == "nan":
+        return _NanConstraint()
     raise ValueError(f"Unknown constraint type: {constraint}")
 
 
-def validate_params(parameter_constraints, *, prefer_skip_nested_validation=False):
+def validate_params(parameter_constraints, *, prefer_skip_nested_validation):
     """Decorator to validate types and values of functions and methods.
 
     Parameters
@@ -155,7 +155,7 @@ def validate_params(parameter_constraints, *, prefer_skip_nested_validation=Fals
         Note that the *args and **kwargs parameters are not validated and must not be
         present in the parameter_constraints dictionary.
 
-    prefer_skip_nested_validation : bool, default=False
+    prefer_skip_nested_validation : bool
         If True, the validation of parameters of inner estimators or functions
         called by the decorated function will be skipped.
 
@@ -690,7 +690,10 @@ class HasMethods(_Constraint):
         The method(s) that the object is expected to expose.
     """
 
-    @validate_params({"methods": [str, list]})
+    @validate_params(
+        {"methods": [str, list]},
+        prefer_skip_nested_validation=True,
+    )
     def __init__(self, methods):
         super().__init__()
         if isinstance(methods, str):
diff --git a/sklearn/utils/_plotting.py b/sklearn/utils/_plotting.py
index c0671046c9cd4..84eaacc152884 100644
--- a/sklearn/utils/_plotting.py
+++ b/sklearn/utils/_plotting.py
@@ -1,9 +1,9 @@
 import numpy as np
 
 from . import check_consistent_length, check_matplotlib_support
+from ._response import _get_response_values_binary
 from .multiclass import type_of_target
 from .validation import _check_pos_label_consistency
-from ._response import _get_response_values_binary
 
 
 class _BinaryClassifierCurveDisplayMixin:
diff --git a/sklearn/utils/_pprint.py b/sklearn/utils/_pprint.py
index c96b1ce764c4a..cea1510746cbe 100644
--- a/sklearn/utils/_pprint.py
+++ b/sklearn/utils/_pprint.py
@@ -67,8 +67,8 @@
 import pprint
 from collections import OrderedDict
 
-from ..base import BaseEstimator
 from .._config import get_config
+from ..base import BaseEstimator
 from . import is_scalar_nan
 
 
diff --git a/sklearn/utils/_random.pxd b/sklearn/utils/_random.pxd
index 89741ea38179c..b5199fc506f4e 100644
--- a/sklearn/utils/_random.pxd
+++ b/sklearn/utils/_random.pxd
@@ -14,7 +14,7 @@ cdef enum:
     # particularly tiny on Windows/MSVC.
     # It corresponds to the maximum representable value for
     # 32-bit signed integers (i.e. 2^31 - 1).
-    RAND_R_MAX = 0x7FFFFFFF
+    RAND_R_MAX = 2147483647
 
 cpdef sample_without_replacement(cnp.int_t n_population,
                                  cnp.int_t n_samples,
diff --git a/sklearn/utils/_response.py b/sklearn/utils/_response.py
index e753ced045e1e..a8504099eb194 100644
--- a/sklearn/utils/_response.py
+++ b/sklearn/utils/_response.py
@@ -5,9 +5,110 @@
 import numpy as np
 
 from ..base import is_classifier
+from .multiclass import type_of_target
 from .validation import _check_response_method, check_is_fitted
 
 
+def _process_predict_proba(*, y_pred, target_type, classes, pos_label):
+    """Get the response values when the response method is `predict_proba`.
+
+    This function process the `y_pred` array in the binary and multi-label cases.
+    In the binary case, it selects the column corresponding to the positive
+    class. In the multi-label case, it stacks the predictions if they are not
+    in the "compressed" format `(n_samples, n_outputs)`.
+
+    Parameters
+    ----------
+    y_pred : ndarray
+        Output of `estimator.predict_proba`. The shape depends on the target type:
+
+        - for binary classification, it is a 2d array of shape `(n_samples, 2)`;
+        - for multiclass classification, it is a 2d array of shape
+          `(n_samples, n_classes)`;
+        - for multilabel classification, it is either a list of 2d arrays of shape
+          `(n_samples, 2)` (e.g. `RandomForestClassifier` or `KNeighborsClassifier`) or
+          an array of shape `(n_samples, n_outputs)` (e.g. `MLPClassifier` or
+          `RidgeClassifier`).
+
+    target_type : {"binary", "multiclass", "multilabel-indicator"}
+        Type of the target.
+
+    classes : ndarray of shape (n_classes,) or list of such arrays
+        Class labels as reported by `estimator.classes_`.
+
+    pos_label : int, float, bool or str
+        Only used with binary and multiclass targets.
+
+    Returns
+    -------
+    y_pred : ndarray of shape (n_samples,), (n_samples, n_classes) or \
+            (n_samples, n_output)
+        Compressed predictions format as requested by the metrics.
+    """
+    if target_type == "binary" and y_pred.shape[1] < 2:
+        # We don't handle classifiers trained on a single class.
+        raise ValueError(
+            f"Got predict_proba of shape {y_pred.shape}, but need "
+            "classifier with two classes."
+        )
+
+    if target_type == "binary":
+        col_idx = np.flatnonzero(classes == pos_label)[0]
+        return y_pred[:, col_idx]
+    elif target_type == "multilabel-indicator":
+        # Use a compress format of shape `(n_samples, n_output)`.
+        # Only `MLPClassifier` and `RidgeClassifier` return an array of shape
+        # `(n_samples, n_outputs)`.
+        if isinstance(y_pred, list):
+            # list of arrays of shape `(n_samples, 2)`
+            return np.vstack([p[:, -1] for p in y_pred]).T
+        else:
+            # array of shape `(n_samples, n_outputs)`
+            return y_pred
+
+    return y_pred
+
+
+def _process_decision_function(*, y_pred, target_type, classes, pos_label):
+    """Get the response values when the response method is `decision_function`.
+
+    This function process the `y_pred` array in the binary and multi-label cases.
+    In the binary case, it inverts the sign of the score if the positive label
+    is not `classes[1]`. In the multi-label case, it stacks the predictions if
+    they are not in the "compressed" format `(n_samples, n_outputs)`.
+
+    Parameters
+    ----------
+    y_pred : ndarray
+        Output of `estimator.predict_proba`. The shape depends on the target type:
+
+        - for binary classification, it is a 1d array of shape `(n_samples,)` where the
+          sign is assuming that `classes[1]` is the positive class;
+        - for multiclass classification, it is a 2d array of shape
+          `(n_samples, n_classes)`;
+        - for multilabel classification, it is a 2d array of shape `(n_samples,
+          n_outputs)`.
+
+    target_type : {"binary", "multiclass", "multilabel-indicator"}
+        Type of the target.
+
+    classes : ndarray of shape (n_classes,) or list of such arrays
+        Class labels as reported by `estimator.classes_`.
+
+    pos_label : int, float, bool or str
+        Only used with binary and multiclass targets.
+
+    Returns
+    -------
+    y_pred : ndarray of shape (n_samples,), (n_samples, n_classes) or \
+            (n_samples, n_output)
+        Compressed predictions format as requested by the metrics.
+    """
+    if target_type == "binary" and pos_label == classes[0]:
+        return -1 * y_pred
+    return y_pred
+
+
 def _get_response_values(
     estimator,
     X,
@@ -16,12 +117,18 @@ def _get_response_values(
 ):
     """Compute the response values of a classifier or a regressor.
 
-    The response values are predictions, one scalar value for each sample in X
-    that depends on the specific choice of `response_method`.
+    The response values are predictions such that it follows the following shape:
+
+    - for binary classification, it is a 1d array of shape `(n_samples,)`;
+    - for multiclass classification, it is a 2d array of shape `(n_samples, n_classes)`;
+    - for multilabel classification, it is a 2d array of shape `(n_samples, n_outputs)`;
+    - for regression, it is a 1d array of shape `(n_samples,)`.
 
     If `estimator` is a binary classifier, also return the label for the
     effective positive class.
 
+    This utility is used primarily in the displays and the scikit-learn scorers.
+
     .. versionadded:: 1.3
 
     Parameters
@@ -51,8 +158,9 @@ def _get_response_values(
 
     Returns
     -------
-    y_pred : ndarray of shape (n_samples,)
-        Target scores calculated from the provided response_method
+    y_pred : ndarray of shape (n_samples,), (n_samples, n_classes) or \
+            (n_samples, n_outputs)
+        Target scores calculated from the provided `response_method`
         and `pos_label`.
 
     pos_label : int, float, bool, str or None
@@ -72,32 +180,33 @@ def _get_response_values(
     if is_classifier(estimator):
         prediction_method = _check_response_method(estimator, response_method)
         classes = estimator.classes_
-        target_type = "binary" if len(classes) <= 2 else "multiclass"
+        target_type = type_of_target(classes)
 
-        if pos_label is not None and pos_label not in classes.tolist():
-            raise ValueError(
-                f"pos_label={pos_label} is not a valid label: It should be "
-                f"one of {classes}"
-            )
-        elif pos_label is None and target_type == "binary":
-            pos_label = pos_label if pos_label is not None else classes[-1]
+        if target_type in ("binary", "multiclass"):
+            if pos_label is not None and pos_label not in classes.tolist():
+                raise ValueError(
+                    f"pos_label={pos_label} is not a valid label: It should be "
+                    f"one of {classes}"
+                )
+            elif pos_label is None and target_type == "binary":
+                pos_label = classes[-1]
 
         y_pred = prediction_method(X)
+
         if prediction_method.__name__ == "predict_proba":
-            if target_type == "binary" and y_pred.shape[1] <= 2:
-                if y_pred.shape[1] == 2:
-                    col_idx = np.flatnonzero(classes == pos_label)[0]
-                    y_pred = y_pred[:, col_idx]
-                else:
-                    err_msg = (
-                        f"Got predict_proba of shape {y_pred.shape}, but need "
-                        "classifier with two classes."
-                    )
-                    raise ValueError(err_msg)
+            y_pred = _process_predict_proba(
+                y_pred=y_pred,
+                target_type=target_type,
+                classes=classes,
+                pos_label=pos_label,
+            )
         elif prediction_method.__name__ == "decision_function":
-            if target_type == "binary":
-                if pos_label == classes[0]:
-                    y_pred *= -1
+            y_pred = _process_decision_function(
+                y_pred=y_pred,
+                target_type=target_type,
+                classes=classes,
+                pos_label=pos_label,
+            )
     else:  # estimator is a regressor
         if response_method != "predict":
             raise ValueError(
diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py
index 8071544091fca..814cab9e568e4 100644
--- a/sklearn/utils/_set_output.py
+++ b/sklearn/utils/_set_output.py
@@ -2,8 +2,8 @@
 
 from scipy.sparse import issparse
 
-from . import check_pandas_support
 from .._config import get_config
+from . import check_pandas_support
 from ._available_if import available_if
 
 
@@ -124,10 +124,27 @@ def _wrap_data_with_container(method, data_to_wrap, original_input, estimator):
     if output_config["dense"] == "default" or not _auto_wrap_is_configured(estimator):
         return data_to_wrap
 
+    def _is_pandas_df(X):
+        """Return True if the X is a pandas dataframe.
+
+        This is is backport from 1.4 in 1.3.1 for compatibility.
+        """
+        import sys
+
+        if hasattr(X, "columns") and hasattr(X, "iloc"):
+            # Likely a pandas DataFrame, we explicitly check the type to confirm.
+            try:
+                pd = sys.modules["pandas"]
+            except KeyError:
+                return False
+            return isinstance(X, pd.DataFrame)
+        return False
+
     # dense_config == "pandas"
+    index = original_input.index if _is_pandas_df(original_input) else None
     return _wrap_in_pandas_container(
         data_to_wrap=data_to_wrap,
-        index=getattr(original_input, "index", None),
+        index=index,
         columns=estimator.get_feature_names_out,
     )
 
diff --git a/sklearn/utils/_show_versions.py b/sklearn/utils/_show_versions.py
index 066c7fc1bd676..714ed37744d57 100644
--- a/sklearn/utils/_show_versions.py
+++ b/sklearn/utils/_show_versions.py
@@ -7,10 +7,9 @@
 
 import platform
 import sys
-from ..utils.fixes import threadpool_info
-from .. import __version__
-
 
+from .. import __version__
+from ..utils.fixes import threadpool_info
 from ._openmp_helpers import _openmp_parallelism_enabled
 
 
@@ -62,7 +61,7 @@ def _get_deps_info():
         "sklearn": __version__,
     }
 
-    from importlib.metadata import version, PackageNotFoundError
+    from importlib.metadata import PackageNotFoundError, version
 
     for modname in deps:
         try:
diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py
index d4e35e6451dd9..278bf38a65690 100644
--- a/sklearn/utils/_testing.py
+++ b/sklearn/utils/_testing.py
@@ -10,53 +10,51 @@
 #          Giorgio Patrini
 #          Thierry Guillemot
 # License: BSD 3 clause
+import atexit
+import contextlib
+import functools
+import inspect
 import os
 import os.path as op
-import inspect
-import warnings
+import re
+import shutil
 import sys
-import functools
 import tempfile
-from subprocess import check_output, STDOUT, CalledProcessError
-from subprocess import TimeoutExpired
-import re
-import contextlib
-from collections.abc import Iterable
-from collections.abc import Sequence
-
-import scipy as sp
+import unittest
+import warnings
+from collections.abc import Iterable, Sequence
 from functools import wraps
 from inspect import signature
-
-import shutil
-import atexit
-import unittest
+from subprocess import STDOUT, CalledProcessError, TimeoutExpired, check_output
 from unittest import TestCase
 
-from numpy.testing import assert_allclose as np_assert_allclose
-from numpy.testing import assert_almost_equal
-from numpy.testing import assert_approx_equal
-from numpy.testing import assert_array_equal
-from numpy.testing import assert_array_almost_equal
-from numpy.testing import assert_array_less
-import numpy as np
 import joblib
+import numpy as np
+import scipy as sp
+from numpy.testing import assert_allclose as np_assert_allclose
+from numpy.testing import (
+    assert_almost_equal,
+    assert_approx_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_array_less,
+    assert_no_warnings,
+)
 
 import sklearn
 from sklearn.utils import (
-    IS_PYPY,
     _IS_32BIT,
+    IS_PYPY,
     _in_unstable_openblas_configuration,
 )
 from sklearn.utils._array_api import _check_array_api_dispatch
+from sklearn.utils.fixes import threadpool_info
 from sklearn.utils.multiclass import check_classification_targets
 from sklearn.utils.validation import (
     check_array,
     check_is_fitted,
     check_X_y,
 )
-from sklearn.utils.fixes import threadpool_info
-
 
 __all__ = [
     "assert_raises",
@@ -68,6 +66,7 @@
     "assert_approx_equal",
     "assert_allclose",
     "assert_run_python_script",
+    "assert_no_warnings",
     "SkipTest",
 ]
 
@@ -83,32 +82,6 @@
 assert_raises_regexp = assert_raises_regex
 
 
-# To remove when we support numpy 1.7
-def assert_no_warnings(func, *args, **kw):
-    """
-    Parameters
-    ----------
-    func
-    *args
-    **kw
-    """
-    # very important to avoid uncontrolled state propagation
-    with warnings.catch_warnings(record=True) as w:
-        warnings.simplefilter("always")
-
-        result = func(*args, **kw)
-        if hasattr(np, "FutureWarning"):
-            # Filter out numpy-specific warnings in numpy >= 1.9
-            w = [e for e in w if e.category is not np.VisibleDeprecationWarning]
-
-        if len(w) > 0:
-            raise AssertionError(
-                "Got warnings when calling %s: [%s]"
-                % (func.__name__, ", ".join(str(warning) for warning in w))
-            )
-    return result
-
-
 def ignore_warnings(obj=None, category=Warning):
     """Context manager and decorator to ignore warnings.
 
diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
index dcf60fb257a27..0ee479c77ac76 100644
--- a/sklearn/utils/class_weight.py
+++ b/sklearn/utils/class_weight.py
@@ -3,7 +3,6 @@
 # License: BSD 3 clause
 
 import numpy as np
-
 from scipy import sparse
 
 
@@ -48,7 +47,7 @@ def compute_class_weight(class_weight, *, classes, y):
         # Find the weight of each class as present in y.
         le = LabelEncoder()
         y_ind = le.fit_transform(y)
-        if not all(np.in1d(classes, le.classes_)):
+        if not all(np.isin(classes, le.classes_)):
             raise ValueError("classes should have valid labels that are in y")
 
         recip_freq = len(y) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64))
@@ -69,8 +68,10 @@ def compute_class_weight(class_weight, *, classes, y):
 
         n_weighted_classes = len(classes) - len(unweighted_classes)
         if unweighted_classes and n_weighted_classes != len(class_weight):
+            unweighted_classes_user_friendly_str = np.array(unweighted_classes).tolist()
             raise ValueError(
-                f"The classes, {unweighted_classes}, are not in class_weight"
+                f"The classes, {unweighted_classes_user_friendly_str}, are not in"
+                " class_weight"
             )
 
     return weight
@@ -184,7 +185,7 @@ def compute_sample_weight(class_weight, y, *, indices=None):
 
         if classes_missing:
             # Make missing classes' weight zero
-            weight_k[np.in1d(y_full, list(classes_missing))] = 0.0
+            weight_k[np.isin(y_full, list(classes_missing))] = 0.0
 
         expanded_class_weight.append(weight_k)
 
diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index a5a70ed699197..685d48d6a0c58 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -1,6 +1,5 @@
-import warnings
 import functools
-
+import warnings
 
 __all__ = ["deprecated"]
 
diff --git a/sklearn/utils/discovery.py b/sklearn/utils/discovery.py
index 083dca5cfcea5..a2f67ba758804 100644
--- a/sklearn/utils/discovery.py
+++ b/sklearn/utils/discovery.py
@@ -1,5 +1,5 @@
-import pkgutil
 import inspect
+import pkgutil
 from importlib import import_module
 from operator import itemgetter
 from pathlib import Path
@@ -38,15 +38,15 @@ def all_estimators(type_filter=None):
         and ``class`` is the actual type of the class.
     """
     # lazy import to avoid circular imports from sklearn.base
-    from . import IS_PYPY
-    from ._testing import ignore_warnings
     from ..base import (
         BaseEstimator,
         ClassifierMixin,
+        ClusterMixin,
         RegressorMixin,
         TransformerMixin,
-        ClusterMixin,
     )
+    from . import IS_PYPY
+    from ._testing import ignore_warnings
 
     def is_abstract(c):
         if not (hasattr(c, "__abstractmethods__")):
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 7d8e673210ff7..ce9dd73ad3747 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1,85 +1,80 @@
-import warnings
 import importlib
 import itertools
 import pickle
 import re
+import warnings
 from copy import deepcopy
 from functools import partial, wraps
 from inspect import signature
-from numbers import Real, Integral
+from numbers import Integral, Real
 
+import joblib
 import numpy as np
 from scipy import sparse
 from scipy.stats import rankdata
-import joblib
 
-from . import IS_PYPY
 from .. import config_context
-from ._param_validation import Interval
-from ._testing import _get_args
-from ._testing import assert_raise_message
-from ._testing import assert_array_equal
-from ._testing import assert_array_almost_equal
-from ._testing import assert_allclose
-from ._testing import assert_allclose_dense_sparse
-from ._testing import assert_array_less
-from ._testing import set_random_state
-from ._testing import SkipTest
-from ._testing import ignore_warnings
-from ._testing import create_memmap_backed_data
-from ._testing import raises
-from . import is_scalar_nan
-
-from ..linear_model import LinearRegression
-from ..linear_model import LogisticRegression
-from ..linear_model import RANSACRegressor
-from ..linear_model import Ridge
-from ..linear_model import SGDRegressor
-
 from ..base import (
-    clone,
     ClusterMixin,
+    RegressorMixin,
+    clone,
     is_classifier,
-    is_regressor,
     is_outlier_detector,
-    RegressorMixin,
+    is_regressor,
+)
+from ..datasets import (
+    load_iris,
+    make_blobs,
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
+from ..exceptions import DataConversionWarning, NotFittedError, SkipTestWarning
+from ..feature_selection import SelectFromModel, SelectKBest
+from ..linear_model import (
+    LinearRegression,
+    LogisticRegression,
+    RANSACRegressor,
+    Ridge,
+    SGDRegressor,
 )
-
 from ..metrics import accuracy_score, adjusted_rand_score, f1_score
-from ..random_projection import BaseRandomProjection
-from ..feature_selection import SelectKBest
-from ..feature_selection import SelectFromModel
-from ..pipeline import make_pipeline
-from ..exceptions import DataConversionWarning
-from ..exceptions import NotFittedError
-from ..exceptions import SkipTestWarning
-from ..model_selection import train_test_split
-from ..model_selection import ShuffleSplit
+from ..metrics.pairwise import linear_kernel, pairwise_distances, rbf_kernel
+from ..model_selection import ShuffleSplit, train_test_split
 from ..model_selection._validation import _safe_split
-from ..metrics.pairwise import rbf_kernel, linear_kernel, pairwise_distances
-from ..utils.fixes import sp_version
-from ..utils.fixes import parse_version
+from ..pipeline import make_pipeline
+from ..preprocessing import StandardScaler, scale
+from ..random_projection import BaseRandomProjection
+from ..utils._array_api import _convert_to_numpy, get_namespace
+from ..utils._array_api import device as array_device
+from ..utils._param_validation import (
+    InvalidParameterError,
+    generate_invalid_param_val,
+    make_constraint,
+)
+from ..utils.fixes import parse_version, sp_version
 from ..utils.validation import check_is_fitted
-from ..utils._array_api import _convert_to_numpy, get_namespace, device as array_device
-from ..utils._param_validation import make_constraint
-from ..utils._param_validation import generate_invalid_param_val
-from ..utils._param_validation import InvalidParameterError
-
-from . import shuffle
+from . import IS_PYPY, is_scalar_nan, shuffle
+from ._param_validation import Interval
 from ._tags import (
     _DEFAULT_TAGS,
     _safe_tags,
 )
-from .validation import has_fit_parameter, _num_samples
-from ..preprocessing import StandardScaler
-from ..preprocessing import scale
-from ..datasets import (
-    load_iris,
-    make_blobs,
-    make_classification,
-    make_multilabel_classification,
-    make_regression,
+from ._testing import (
+    SkipTest,
+    _get_args,
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_array_almost_equal,
+    assert_array_equal,
+    assert_array_less,
+    assert_raise_message,
+    create_memmap_backed_data,
+    ignore_warnings,
+    raises,
+    set_random_state,
 )
+from .validation import _num_samples, has_fit_parameter
 
 REGRESSION_DATASET = None
 CROSS_DECOMPOSITION = ["PLSCanonical", "PLSRegression", "CCA", "PLSSVD"]
@@ -581,8 +576,8 @@ def check_estimator(estimator=None, generate_only=False):
     independently and report the checks that are failing.
 
     scikit-learn provides a pytest specific decorator,
-    :func:`~sklearn.utils.parametrize_with_checks`, making it easier to test
-    multiple estimators.
+    :func:`~sklearn.utils.estimator_checks.parametrize_with_checks`, making it
+    easier to test multiple estimators.
 
     Parameters
     ----------
@@ -1266,7 +1261,10 @@ def check_dtype_object(name, estimator_orig):
 
     if "string" not in tags["X_types"]:
         X[0, 0] = {"foo": "bar"}
-        msg = "argument must be a string.* number"
+        # This error is raised by:
+        # - `np.asarray` in `check_array`
+        # - `_unique_python` for encoders
+        msg = "argument must be .* string.* number"
         with raises(TypeError, match=msg):
             estimator.fit(X, y)
     else:
@@ -1432,7 +1430,7 @@ def _apply_on_subsets(func, X):
 
     if sparse.issparse(result_full):
         result_full = result_full.A
-        result_by_batch = [x.A for x in result_by_batch]
+        result_by_batch = [x.toarray() for x in result_by_batch]
 
     return np.ravel(result_full), np.ravel(result_by_batch)
 
@@ -3458,7 +3456,6 @@ def _enforce_estimator_tags_y(estimator, y):
         # Create strictly positive y. The minimal increment above 0 is 1, as
         # y could be of integer dtype.
         y += 1 + abs(y.min())
-    # Estimators with a `binary_only` tag only accept up to two unique y values
     if _safe_tags(estimator, key="binary_only") and y.size > 0:
         y = np.where(y == y.flat[0], y, y.flat[0] + 1)
     # Estimators in mono_output_task_error raise ValueError if y is of 1-D
@@ -3478,7 +3475,8 @@ def _enforce_estimator_tags_X(estimator, X, kernel=linear_kernel):
     if _safe_tags(estimator, key="requires_positive_X"):
         X = X - X.min()
     if "categorical" in _safe_tags(estimator, key="X_types"):
-        X = (X - X.min()).astype(np.int32)
+        dtype = np.float64 if _safe_tags(estimator, key="allow_nan") else np.int32
+        X = np.round((X - X.min())).astype(dtype)
 
     if estimator.__class__.__name__ == "SkewedChi2Sampler":
         # SkewedChi2Sampler requires X > -skewdness in transform
@@ -4484,7 +4482,7 @@ def check_set_output_transform_pandas(name, transformer_orig):
         outputs_pandas = _output_from_fit_transform(transformer_pandas, name, X, df, y)
     except ValueError as e:
         # transformer does not support sparse data
-        assert str(e) == "Pandas output does not support sparse data.", e
+        assert "Pandas output does not support sparse data." in str(e), e
         return
 
     for case in outputs_default:
@@ -4493,7 +4491,7 @@ def check_set_output_transform_pandas(name, transformer_orig):
         )
 
 
-def check_global_ouptut_transform_pandas(name, transformer_orig):
+def check_global_output_transform_pandas(name, transformer_orig):
     """Check that setting globally the output of a transformer to pandas lead to the
     right results."""
     try:
@@ -4530,7 +4528,7 @@ def check_global_ouptut_transform_pandas(name, transformer_orig):
             )
     except ValueError as e:
         # transformer does not support sparse data
-        assert str(e) == "Pandas output does not support sparse data.", e
+        assert "Pandas output does not support sparse data." in str(e), e
         return
 
     for case in outputs_default:
diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index 404bc5e095976..e548204463fba 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -17,10 +17,10 @@
 from scipy import linalg, sparse
 
 from . import check_random_state
+from ._array_api import _is_numpy_namespace, get_namespace
 from ._logistic_sigmoid import _log_logistic_sigmoid
 from .sparsefuncs_fast import csr_row_norms
 from .validation import check_array
-from ._array_api import get_namespace, _is_numpy_namespace
 
 
 def squared_norm(x):
@@ -72,8 +72,7 @@ def row_norms(X, squared=False):
         The row-wise (squared) Euclidean norm of X.
     """
     if sparse.issparse(X):
-        if not sparse.isspmatrix_csr(X):
-            X = sparse.csr_matrix(X)
+        X = X.tocsr()
         norms = csr_row_norms(X)
     else:
         norms = np.einsum("ij,ij->i", X, X)
@@ -425,7 +424,7 @@ def randomized_svd(
     >>> U.shape, s.shape, Vh.shape
     ((3, 2), (2,), (2, 4))
     """
-    if sparse.isspmatrix_lil(M) or sparse.isspmatrix_dok(M):
+    if sparse.issparse(M) and M.format in ("lil", "dok"):
         warnings.warn(
             "Calculating SVD of a {} is expensive. "
             "csr_matrix is more efficient.".format(type(M).__name__),
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index 71bccd1b633a6..bcdc227d2f93d 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -10,26 +10,28 @@
 #
 # License: BSD 3 clause
 
-from importlib import resources
 import sys
+from importlib import resources
 
-import sklearn
 import numpy as np
 import scipy
+import scipy.sparse.linalg
 import scipy.stats
 import threadpoolctl
 
-from .deprecation import deprecated
-from ..externals._packaging.version import parse as parse_version
+import sklearn
 
+from ..externals._packaging.version import parse as parse_version
+from .deprecation import deprecated
 
 np_version = parse_version(np.__version__)
+np_base_version = parse_version(np_version.base_version)
 sp_version = parse_version(scipy.__version__)
 sp_base_version = parse_version(sp_version.base_version)
 
 
 try:
-    from scipy.optimize._linesearch import line_search_wolfe2, line_search_wolfe1
+    from scipy.optimize._linesearch import line_search_wolfe1, line_search_wolfe2
 except ImportError:  # SciPy < 1.8
     from scipy.optimize.linesearch import line_search_wolfe2, line_search_wolfe1  # type: ignore  # noqa
 
@@ -109,6 +111,19 @@ def _mode(a, axis=0):
     return scipy.stats.mode(a, axis=axis)
 
 
+# TODO: Remove when Scipy 1.12 is the minimum supported version
+if sp_base_version >= parse_version("1.12.0"):
+    _sparse_linalg_cg = scipy.sparse.linalg.cg
+else:
+
+    def _sparse_linalg_cg(A, b, **kwargs):
+        if "rtol" in kwargs:
+            kwargs["tol"] = kwargs.pop("rtol")
+        if "atol" not in kwargs:
+            kwargs["atol"] = "legacy"
+        return scipy.sparse.linalg.cg(A, b, **kwargs)
+
+
 ###############################################################################
 # Backport of Python 3.9's importlib.resources
 # TODO: Remove when Python 3.9 is the minimum supported version
@@ -158,3 +173,18 @@ def _contents(data_module):
         )
     else:
         return resources.contents(data_module)
+
+
+# For +1.25 NumPy versions exceptions and warnings are being moved
+# to a dedicated submodule.
+if np_version >= parse_version("1.25.0"):
+    from numpy.exceptions import ComplexWarning, VisibleDeprecationWarning
+else:
+    from numpy import ComplexWarning, VisibleDeprecationWarning  # type: ignore  # noqa
+
+
+# TODO: Remove when Scipy 1.6 is the minimum supported version
+try:
+    from scipy.integrate import trapezoid  # type: ignore  # noqa
+except ImportError:
+    from scipy.integrate import trapz as trapezoid  # type: ignore  # noqa
diff --git a/sklearn/utils/graph.py b/sklearn/utils/graph.py
index ffd4f63a466de..65b088197f46e 100644
--- a/sklearn/utils/graph.py
+++ b/sklearn/utils/graph.py
@@ -54,7 +54,7 @@ def single_source_shortest_path_length(graph, source, *, cutoff=None):
     >>> sorted(single_source_shortest_path_length(graph, 2).items())
     [(0, 1), (1, 1), (2, 0), (3, 1), (4, 1), (5, 1)]
     """
-    if sparse.isspmatrix(graph):
+    if sparse.issparse(graph):
         graph = graph.tolil()
     else:
         graph = sparse.lil_matrix(graph)
diff --git a/sklearn/utils/metaestimators.py b/sklearn/utils/metaestimators.py
index 405edeae0a55d..4d1c52972dbdd 100644
--- a/sklearn/utils/metaestimators.py
+++ b/sklearn/utils/metaestimators.py
@@ -2,15 +2,15 @@
 # Author: Joel Nothman
 #         Andreas Mueller
 # License: BSD
-from typing import List, Any
-
 from abc import ABCMeta, abstractmethod
-import numpy as np
 from contextlib import suppress
+from typing import Any, List
+
+import numpy as np
 
+from ..base import BaseEstimator
 from ..utils import _safe_indexing
 from ..utils._tags import _safe_tags
-from ..base import BaseEstimator
 from ._available_if import available_if
 
 __all__ = ["available_if"]
diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 8c816b40eec4b..1f46f6400df98 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -6,18 +6,16 @@
 ==========================================
 
 """
+import warnings
 from collections.abc import Sequence
 from itertools import chain
-import warnings
-
-from scipy.sparse import issparse
-from scipy.sparse import isspmatrix_dok
-from scipy.sparse import isspmatrix_lil
 
 import numpy as np
+from scipy.sparse import issparse
 
-from .validation import check_array, _assert_all_finite
 from ..utils._array_api import get_namespace
+from ..utils.fixes import VisibleDeprecationWarning
+from .validation import _assert_all_finite, check_array
 
 
 def _unique_multiclass(y):
@@ -164,10 +162,10 @@ def is_multilabel(y):
             ensure_min_features=0,
         )
         with warnings.catch_warnings():
-            warnings.simplefilter("error", np.VisibleDeprecationWarning)
+            warnings.simplefilter("error", VisibleDeprecationWarning)
             try:
                 y = check_array(y, dtype=None, **check_y_kwargs)
-            except (np.VisibleDeprecationWarning, ValueError) as e:
+            except (VisibleDeprecationWarning, ValueError) as e:
                 if str(e).startswith("Complex data not supported"):
                     raise
 
@@ -179,7 +177,7 @@ def is_multilabel(y):
         return False
 
     if issparse(y):
-        if isspmatrix_dok(y) or isspmatrix_lil(y):
+        if y.format in ("dok", "lil"):
             y = y.tocsr()
         labels = xp.unique_values(y.data)
         return (
@@ -327,11 +325,11 @@ def type_of_target(y, input_name=""):
     )
 
     with warnings.catch_warnings():
-        warnings.simplefilter("error", np.VisibleDeprecationWarning)
+        warnings.simplefilter("error", VisibleDeprecationWarning)
         if not issparse(y):
             try:
                 y = check_array(y, dtype=None, **check_y_kwargs)
-            except (np.VisibleDeprecationWarning, ValueError) as e:
+            except (VisibleDeprecationWarning, ValueError) as e:
                 if str(e).startswith("Complex data not supported"):
                     raise
 
diff --git a/sklearn/utils/optimize.py b/sklearn/utils/optimize.py
index 7e9b864afe043..68a1ae1dddb98 100644
--- a/sklearn/utils/optimize.py
+++ b/sklearn/utils/optimize.py
@@ -13,11 +13,12 @@
 # Modifications by Gael Varoquaux, Mathieu Blondel and Tom Dupre la Tour
 # License: BSD
 
-import numpy as np
 import warnings
 
-from .fixes import line_search_wolfe1, line_search_wolfe2
+import numpy as np
+
 from ..exceptions import ConvergenceWarning
+from .fixes import line_search_wolfe1, line_search_wolfe2
 
 
 class _LineSearchError(RuntimeError):
diff --git a/sklearn/utils/random.py b/sklearn/utils/random.py
index 3c8c71be14bec..c58808939bfdc 100644
--- a/sklearn/utils/random.py
+++ b/sklearn/utils/random.py
@@ -1,9 +1,10 @@
 # Author: Hamzeh Alsalhi <ha258@cornell.edu>
 #
 # License: BSD 3 clause
+import array
+
 import numpy as np
 import scipy.sparse as sp
-import array
 
 from . import check_random_state
 from ._random import sample_without_replacement
diff --git a/sklearn/utils/sparsefuncs.py b/sklearn/utils/sparsefuncs.py
index 6b0f8bea3f774..b2d0e725b8453 100644
--- a/sklearn/utils/sparsefuncs.py
+++ b/sklearn/utils/sparsefuncs.py
@@ -3,15 +3,19 @@
 #          Giorgio Patrini
 #
 # License: BSD 3 clause
-import scipy.sparse as sp
 import numpy as np
+import scipy.sparse as sp
 
+from ..utils.validation import _check_sample_weight
 from .sparsefuncs_fast import (
-    csr_mean_variance_axis0 as _csr_mean_var_axis0,
     csc_mean_variance_axis0 as _csc_mean_var_axis0,
+)
+from .sparsefuncs_fast import (
+    csr_mean_variance_axis0 as _csr_mean_var_axis0,
+)
+from .sparsefuncs_fast import (
     incr_mean_variance_axis0 as _incr_mean_var_axis0,
 )
-from ..utils.validation import _check_sample_weight
 
 
 def _raise_typeerror(X):
@@ -103,7 +107,7 @@ def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):
     """
     _raise_error_wrong_axis(axis)
 
-    if sp.isspmatrix_csr(X):
+    if sp.issparse(X) and X.format == "csr":
         if axis == 0:
             return _csr_mean_var_axis0(
                 X, weights=weights, return_sum_weights=return_sum_weights
@@ -112,7 +116,7 @@ def mean_variance_axis(X, axis, weights=None, return_sum_weights=False):
             return _csc_mean_var_axis0(
                 X.T, weights=weights, return_sum_weights=return_sum_weights
             )
-    elif sp.isspmatrix_csc(X):
+    elif sp.issparse(X) and X.format == "csc":
         if axis == 0:
             return _csc_mean_var_axis0(
                 X, weights=weights, return_sum_weights=return_sum_weights
@@ -187,7 +191,7 @@ def incr_mean_variance_axis(X, *, axis, last_mean, last_var, last_n, weights=Non
     """
     _raise_error_wrong_axis(axis)
 
-    if not (sp.isspmatrix_csr(X) or sp.isspmatrix_csc(X)):
+    if not (sp.issparse(X) and X.format in ("csc", "csr")):
         _raise_typeerror(X)
 
     if np.size(last_n) == 1:
@@ -234,9 +238,9 @@ def inplace_column_scale(X, scale):
     scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
         Array of precomputed feature-wise values to use for scaling.
     """
-    if sp.isspmatrix_csc(X):
+    if sp.issparse(X) and X.format == "csc":
         inplace_csr_row_scale(X.T, scale)
-    elif sp.isspmatrix_csr(X):
+    elif sp.issparse(X) and X.format == "csr":
         inplace_csr_column_scale(X, scale)
     else:
         _raise_typeerror(X)
@@ -256,9 +260,9 @@ def inplace_row_scale(X, scale):
     scale : ndarray of shape (n_features,), dtype={np.float32, np.float64}
         Array of precomputed sample-wise values to use for scaling.
     """
-    if sp.isspmatrix_csc(X):
+    if sp.issparse(X) and X.format == "csc":
         inplace_csr_column_scale(X.T, scale)
-    elif sp.isspmatrix_csr(X):
+    elif sp.issparse(X) and X.format == "csr":
         inplace_csr_row_scale(X, scale)
     else:
         _raise_typeerror(X)
@@ -372,9 +376,9 @@ def inplace_swap_row(X, m, n):
     n : int
         Index of the row of X to be swapped.
     """
-    if sp.isspmatrix_csc(X):
+    if sp.issparse(X) and X.format == "csc":
         inplace_swap_row_csc(X, m, n)
-    elif sp.isspmatrix_csr(X):
+    elif sp.issparse(X) and X.format == "csr":
         inplace_swap_row_csr(X, m, n)
     else:
         _raise_typeerror(X)
@@ -400,9 +404,9 @@ def inplace_swap_column(X, m, n):
         m += X.shape[1]
     if n < 0:
         n += X.shape[1]
-    if sp.isspmatrix_csc(X):
+    if sp.issparse(X) and X.format == "csc":
         inplace_swap_row_csr(X, m, n)
-    elif sp.isspmatrix_csr(X):
+    elif sp.issparse(X) and X.format == "csr":
         inplace_swap_row_csc(X, m, n)
     else:
         _raise_typeerror(X)
@@ -501,7 +505,7 @@ def min_max_axis(X, axis, ignore_nan=False):
     maxs : ndarray of shape (n_features,), dtype={np.float32, np.float64}
         Feature-wise maxima.
     """
-    if sp.isspmatrix_csr(X) or sp.isspmatrix_csc(X):
+    if sp.issparse(X) and X.format in ("csr", "csc"):
         if ignore_nan:
             return _sparse_nan_min_max(X, axis=axis)
         else:
@@ -610,7 +614,7 @@ def csc_median_axis_0(X):
     median : ndarray of shape (n_features,)
         Median.
     """
-    if not sp.isspmatrix_csc(X):
+    if not (sp.issparse(X) and X.format == "csc"):
         raise TypeError("Expected matrix of CSC format, got %s" % X.format)
 
     indptr = X.indptr
diff --git a/sklearn/utils/tests/test_array_api.py b/sklearn/utils/tests/test_array_api.py
index 77fa20e6d0b58..28abf47103b25 100644
--- a/sklearn/utils/tests/test_array_api.py
+++ b/sklearn/utils/tests/test_array_api.py
@@ -1,18 +1,19 @@
 import numpy
-from numpy.testing import assert_allclose, assert_array_equal
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 
+from sklearn._config import config_context
 from sklearn.base import BaseEstimator
-from sklearn.utils._array_api import get_namespace
-from sklearn.utils._array_api import _NumPyAPIWrapper
-from sklearn.utils._array_api import _ArrayAPIWrapper
-from sklearn.utils._array_api import _asarray_with_order
-from sklearn.utils._array_api import _convert_to_numpy
-from sklearn.utils._array_api import _estimator_with_converted_arrays
+from sklearn.utils._array_api import (
+    _ArrayAPIWrapper,
+    _asarray_with_order,
+    _convert_to_numpy,
+    _estimator_with_converted_arrays,
+    _NumPyAPIWrapper,
+    get_namespace,
+)
 from sklearn.utils._testing import skip_if_array_api_compat_not_configured
 
-from sklearn._config import config_context
-
 pytestmark = pytest.mark.filterwarnings(
     "ignore:The numpy.array_api submodule:UserWarning"
 )
diff --git a/sklearn/utils/tests/test_arrayfuncs.py b/sklearn/utils/tests/test_arrayfuncs.py
index 5c43e480d395c..b0a02e13d1639 100644
--- a/sklearn/utils/tests/test_arrayfuncs.py
+++ b/sklearn/utils/tests/test_arrayfuncs.py
@@ -1,5 +1,5 @@
-import pytest
 import numpy as np
+import pytest
 
 from sklearn.utils._testing import assert_allclose
 from sklearn.utils.arrayfuncs import min_pos
diff --git a/sklearn/utils/tests/test_class_weight.py b/sklearn/utils/tests/test_class_weight.py
index ebeeeeac56e8a..c9b0a56143d76 100644
--- a/sklearn/utils/tests/test_class_weight.py
+++ b/sklearn/utils/tests/test_class_weight.py
@@ -6,11 +6,8 @@
 from sklearn.datasets import make_blobs
 from sklearn.linear_model import LogisticRegression
 from sklearn.tree import DecisionTreeClassifier
-
-from sklearn.utils.class_weight import compute_class_weight
-from sklearn.utils.class_weight import compute_sample_weight
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_almost_equal
+from sklearn.utils._testing import assert_almost_equal, assert_array_almost_equal
+from sklearn.utils.class_weight import compute_class_weight, compute_sample_weight
 
 
 def test_compute_class_weight():
diff --git a/sklearn/utils/tests/test_cython_blas.py b/sklearn/utils/tests/test_cython_blas.py
index 1b311f5160db5..e57bfc3ec5a9c 100644
--- a/sklearn/utils/tests/test_cython_blas.py
+++ b/sklearn/utils/tests/test_cython_blas.py
@@ -1,21 +1,24 @@
-import pytest
-
 import numpy as np
+import pytest
 
+from sklearn.utils._cython_blas import (
+    ColMajor,
+    NoTrans,
+    RowMajor,
+    Trans,
+    _asum_memview,
+    _axpy_memview,
+    _copy_memview,
+    _dot_memview,
+    _gemm_memview,
+    _gemv_memview,
+    _ger_memview,
+    _nrm2_memview,
+    _rot_memview,
+    _rotg_memview,
+    _scal_memview,
+)
 from sklearn.utils._testing import assert_allclose
-from sklearn.utils._cython_blas import _dot_memview
-from sklearn.utils._cython_blas import _asum_memview
-from sklearn.utils._cython_blas import _axpy_memview
-from sklearn.utils._cython_blas import _nrm2_memview
-from sklearn.utils._cython_blas import _copy_memview
-from sklearn.utils._cython_blas import _scal_memview
-from sklearn.utils._cython_blas import _rotg_memview
-from sklearn.utils._cython_blas import _rot_memview
-from sklearn.utils._cython_blas import _gemv_memview
-from sklearn.utils._cython_blas import _ger_memview
-from sklearn.utils._cython_blas import _gemm_memview
-from sklearn.utils._cython_blas import RowMajor, ColMajor
-from sklearn.utils._cython_blas import Trans, NoTrans
 
 
 def _numpy_to_cython(dtype):
diff --git a/sklearn/utils/tests/test_cython_templating.py b/sklearn/utils/tests/test_cython_templating.py
index eeb8319e07415..f5c9fa7a9087e 100644
--- a/sklearn/utils/tests/test_cython_templating.py
+++ b/sklearn/utils/tests/test_cython_templating.py
@@ -1,5 +1,7 @@
 import pathlib
+
 import pytest
+
 import sklearn
 
 
diff --git a/sklearn/utils/tests/test_deprecation.py b/sklearn/utils/tests/test_deprecation.py
index 98c69a8abb780..4d04b48da2f0b 100644
--- a/sklearn/utils/tests/test_deprecation.py
+++ b/sklearn/utils/tests/test_deprecation.py
@@ -4,10 +4,10 @@
 
 import pickle
 
-from sklearn.utils.deprecation import _is_deprecated
-from sklearn.utils.deprecation import deprecated
 import pytest
 
+from sklearn.utils.deprecation import _is_deprecated, deprecated
+
 
 @deprecated("qwerty")
 class MockClass1:
diff --git a/sklearn/utils/tests/test_encode.py b/sklearn/utils/tests/test_encode.py
index 083db25b7ca80..9118eb56f0ba4 100644
--- a/sklearn/utils/tests/test_encode.py
+++ b/sklearn/utils/tests/test_encode.py
@@ -4,10 +4,7 @@
 import pytest
 from numpy.testing import assert_array_equal
 
-from sklearn.utils._encode import _unique
-from sklearn.utils._encode import _encode
-from sklearn.utils._encode import _check_unknown
-from sklearn.utils._encode import _get_counts
+from sklearn.utils._encode import _check_unknown, _encode, _get_counts, _unique
 
 
 @pytest.mark.parametrize(
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index ff736963723b8..535c821623490 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -3,48 +3,45 @@
 # tests to make sure estimator_checks works without pytest.
 
 import importlib
-import unittest
 import sys
+import unittest
 import warnings
 from numbers import Integral, Real
 
+import joblib
 import numpy as np
 import scipy.sparse as sp
-import joblib
 
 from sklearn import config_context, get_config
 from sklearn.base import BaseEstimator, ClassifierMixin, OutlierMixin
+from sklearn.cluster import MiniBatchKMeans
 from sklearn.datasets import make_multilabel_classification
-from sklearn.utils import deprecated
+from sklearn.decomposition import PCA
+from sklearn.ensemble import ExtraTreesClassifier
+from sklearn.exceptions import ConvergenceWarning, SkipTestWarning
+from sklearn.linear_model import (
+    LinearRegression,
+    LogisticRegression,
+    MultiTaskElasticNet,
+    SGDClassifier,
+)
+from sklearn.mixture import GaussianMixture
+from sklearn.neighbors import KNeighborsRegressor
+from sklearn.svm import SVC, NuSVC
+from sklearn.utils import _array_api, all_estimators, deprecated
+from sklearn.utils._param_validation import Interval, StrOptions
 from sklearn.utils._testing import (
-    raises,
-    ignore_warnings,
     MinimalClassifier,
     MinimalRegressor,
     MinimalTransformer,
     SkipTest,
+    ignore_warnings,
+    raises,
 )
-
-from sklearn.utils.validation import check_is_fitted, check_X_y
-from sklearn.ensemble import ExtraTreesClassifier
-from sklearn.linear_model import LinearRegression, SGDClassifier
-from sklearn.mixture import GaussianMixture
-from sklearn.cluster import MiniBatchKMeans
-from sklearn.decomposition import PCA
-from sklearn.linear_model import MultiTaskElasticNet, LogisticRegression
-from sklearn.svm import SVC, NuSVC
-from sklearn.neighbors import KNeighborsRegressor
-from sklearn.utils.validation import check_array
-from sklearn.utils import all_estimators
-from sklearn.exceptions import SkipTestWarning
-from sklearn.utils import _array_api
-from sklearn.utils.metaestimators import available_if
-from sklearn.utils.estimator_checks import check_decision_proba_consistency
-from sklearn.utils._param_validation import Interval, StrOptions
-
 from sklearn.utils.estimator_checks import (
     _NotAnArray,
     _set_checking_parameters,
+    _yield_all_checks,
     check_array_api_input,
     check_class_weight_balanced_linear_classifier,
     check_classifier_data_not_an_array,
@@ -52,21 +49,23 @@
     check_classifiers_multilabel_output_format_predict,
     check_classifiers_multilabel_output_format_predict_proba,
     check_dataframe_column_names_consistency,
+    check_decision_proba_consistency,
     check_estimator,
     check_estimator_get_tags_default_keys,
     check_estimators_unfitted,
+    check_fit_check_is_fitted,
     check_fit_score_takes_y,
+    check_methods_sample_order_invariance,
+    check_methods_subset_invariance,
     check_no_attributes_set_in_init,
+    check_outlier_contamination,
+    check_outlier_corruption,
     check_regressor_data_not_an_array,
     check_requires_y_none,
-    check_outlier_corruption,
-    check_outlier_contamination,
     set_random_state,
-    check_fit_check_is_fitted,
-    check_methods_sample_order_invariance,
-    check_methods_subset_invariance,
-    _yield_all_checks,
 )
+from sklearn.utils.metaestimators import available_if
+from sklearn.utils.validation import check_array, check_is_fitted, check_X_y
 
 
 class CorrectNotFittedError(ValueError):
@@ -708,22 +707,20 @@ def test_check_estimator_clones():
         ExtraTreesClassifier,
         MiniBatchKMeans,
     ]:
-        with ignore_warnings(category=FutureWarning):
-            # when 'est = SGDClassifier()'
+        # without fitting
+        with ignore_warnings(category=ConvergenceWarning):
             est = Estimator()
             _set_checking_parameters(est)
             set_random_state(est)
-            # without fitting
             old_hash = joblib.hash(est)
             check_estimator(est)
         assert old_hash == joblib.hash(est)
 
-        with ignore_warnings(category=FutureWarning):
-            # when 'est = SGDClassifier()'
+        # with fitting
+        with ignore_warnings(category=ConvergenceWarning):
             est = Estimator()
             _set_checking_parameters(est)
             set_random_state(est)
-            # with fitting
             est.fit(iris.data + 10, iris.target)
             old_hash = joblib.hash(est)
             check_estimator(est)
diff --git a/sklearn/utils/tests/test_estimator_html_repr.py b/sklearn/utils/tests/test_estimator_html_repr.py
index 655e21a6cc25d..e4327dcbc2c46 100644
--- a/sklearn/utils/tests/test_estimator_html_repr.py
+++ b/sklearn/utils/tests/test_estimator_html_repr.py
@@ -1,37 +1,31 @@
-from contextlib import closing
 import html
+from contextlib import closing
 from io import StringIO
 
 import pytest
 
 from sklearn import config_context
-from sklearn.linear_model import LogisticRegression
-from sklearn.neural_network import MLPClassifier
-from sklearn.impute import SimpleImputer
-from sklearn.decomposition import PCA
-from sklearn.decomposition import TruncatedSVD
-from sklearn.pipeline import Pipeline
-from sklearn.pipeline import FeatureUnion
+from sklearn.cluster import AgglomerativeClustering, Birch
 from sklearn.compose import ColumnTransformer
-from sklearn.ensemble import VotingClassifier
+from sklearn.decomposition import PCA, TruncatedSVD
+from sklearn.ensemble import StackingClassifier, StackingRegressor, VotingClassifier
 from sklearn.feature_selection import SelectPercentile
-from sklearn.cluster import Birch
-from sklearn.cluster import AgglomerativeClustering
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.preprocessing import StandardScaler
-from sklearn.svm import LinearSVC
-from sklearn.svm import LinearSVR
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.multiclass import OneVsOneClassifier
-from sklearn.ensemble import StackingClassifier
-from sklearn.ensemble import StackingRegressor
 from sklearn.gaussian_process.kernels import ExpSineSquared
+from sklearn.impute import SimpleImputer
 from sklearn.kernel_ridge import KernelRidge
-
+from sklearn.linear_model import LogisticRegression
 from sklearn.model_selection import RandomizedSearchCV
-from sklearn.utils._estimator_html_repr import _write_label_html
-from sklearn.utils._estimator_html_repr import _get_visual_block
-from sklearn.utils._estimator_html_repr import estimator_html_repr
+from sklearn.multiclass import OneVsOneClassifier
+from sklearn.neural_network import MLPClassifier
+from sklearn.pipeline import FeatureUnion, Pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.svm import LinearSVC, LinearSVR
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.utils._estimator_html_repr import (
+    _get_visual_block,
+    _write_label_html,
+    estimator_html_repr,
+)
 
 
 @pytest.mark.parametrize("checked", [True, False])
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index 37610419188ad..cfb6a19f5ee9b 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -4,37 +4,41 @@
 #
 # License: BSD 3 clause
 import numpy as np
-from scipy import sparse
-from scipy import linalg
-from scipy.sparse.linalg import eigsh
+import pytest
+from scipy import linalg, sparse
 from scipy.linalg import eigh
+from scipy.sparse.linalg import eigsh
 from scipy.special import expit
 
-import pytest
+from sklearn.datasets import make_low_rank_matrix, make_sparse_spd_matrix
 from sklearn.utils import gen_batches
 from sklearn.utils._arpack import _init_arpack_v0
-from sklearn.utils._testing import assert_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import assert_allclose_dense_sparse
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import skip_if_32bit
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+    skip_if_32bit,
+)
+from sklearn.utils.extmath import (
+    _deterministic_vector_sign_flip,
+    _incremental_mean_and_var,
+    _randomized_eigsh,
+    _safe_accumulator_op,
+    cartesian,
+    density,
+    log_logistic,
+    randomized_svd,
+    row_norms,
+    safe_sparse_dot,
+    softmax,
+    stable_cumsum,
+    svd_flip,
+    weighted_mode,
+)
 from sklearn.utils.fixes import _mode
 
-from sklearn.utils.extmath import density, _safe_accumulator_op
-from sklearn.utils.extmath import randomized_svd, _randomized_eigsh
-from sklearn.utils.extmath import row_norms
-from sklearn.utils.extmath import weighted_mode
-from sklearn.utils.extmath import cartesian
-from sklearn.utils.extmath import log_logistic
-from sklearn.utils.extmath import svd_flip
-from sklearn.utils.extmath import _incremental_mean_and_var
-from sklearn.utils.extmath import _deterministic_vector_sign_flip
-from sklearn.utils.extmath import softmax
-from sklearn.utils.extmath import stable_cumsum
-from sklearn.utils.extmath import safe_sparse_dot
-from sklearn.datasets import make_low_rank_matrix, make_sparse_spd_matrix
-
 
 def test_density():
     rng = np.random.RandomState(0)
diff --git a/sklearn/utils/tests/test_fast_dict.py b/sklearn/utils/tests/test_fast_dict.py
index 96c14068f0db1..8fada45db3f52 100644
--- a/sklearn/utils/tests/test_fast_dict.py
+++ b/sklearn/utils/tests/test_fast_dict.py
@@ -1,7 +1,7 @@
 """ Test fast_dict.
 """
 import numpy as np
-from numpy.testing import assert_array_equal, assert_allclose
+from numpy.testing import assert_allclose, assert_array_equal
 
 from sklearn.utils._fast_dict import IntFloatDict, argmin
 
diff --git a/sklearn/utils/tests/test_fixes.py b/sklearn/utils/tests/test_fixes.py
index 39c60b9b416ea..fbc111a3b55fa 100644
--- a/sklearn/utils/tests/test_fixes.py
+++ b/sklearn/utils/tests/test_fixes.py
@@ -7,7 +7,6 @@
 import pytest
 
 from sklearn.utils._testing import assert_array_equal
-
 from sklearn.utils.fixes import _object_dtype_isnan, delayed
 
 
diff --git a/sklearn/utils/tests/test_graph.py b/sklearn/utils/tests/test_graph.py
index 78196fbb05fba..d64108a40d8ab 100644
--- a/sklearn/utils/tests/test_graph.py
+++ b/sklearn/utils/tests/test_graph.py
@@ -1,10 +1,10 @@
-import pytest
 import numpy as np
+import pytest
 from scipy.sparse.csgraph import connected_components
 
+from sklearn.metrics.pairwise import pairwise_distances
 from sklearn.neighbors import kneighbors_graph
 from sklearn.utils.graph import _fix_connected_components
-from sklearn.metrics.pairwise import pairwise_distances
 
 
 def test_fix_connected_components():
diff --git a/sklearn/utils/tests/test_metaestimators.py b/sklearn/utils/tests/test_metaestimators.py
index 33ee937f34371..8e6d4eec35973 100644
--- a/sklearn/utils/tests/test_metaestimators.py
+++ b/sklearn/utils/tests/test_metaestimators.py
@@ -1,7 +1,7 @@
-import pytest
-
 import pickle
 
+import pytest
+
 from sklearn.utils.metaestimators import available_if
 
 
diff --git a/sklearn/utils/tests/test_mocking.py b/sklearn/utils/tests/test_mocking.py
index 718c62d5cc83b..c7c732e9bf8f1 100644
--- a/sklearn/utils/tests/test_mocking.py
+++ b/sklearn/utils/tests/test_mocking.py
@@ -1,19 +1,15 @@
 import numpy as np
 import pytest
+from numpy.testing import assert_allclose, assert_array_equal
 from scipy import sparse
 
-from numpy.testing import assert_array_equal
-from numpy.testing import assert_allclose
-
 from sklearn.datasets import load_iris
-from sklearn.utils import check_array
-from sklearn.utils import _safe_indexing
-from sklearn.utils._testing import _convert_container
-
+from sklearn.utils import _safe_indexing, check_array
 from sklearn.utils._mocking import (
-    _MockEstimatorOnOffPrediction,
     CheckingClassifier,
+    _MockEstimatorOnOffPrediction,
 )
+from sklearn.utils._testing import _convert_container
 
 
 @pytest.fixture
diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py
index 731edbefc3925..36bb8c9f4cc9b 100644
--- a/sklearn/utils/tests/test_multiclass.py
+++ b/sklearn/utils/tests/test_multiclass.py
@@ -1,31 +1,35 @@
-import numpy as np
-import scipy.sparse as sp
 from itertools import product
-import pytest
-
-from scipy.sparse import issparse
-from scipy.sparse import csc_matrix
-from scipy.sparse import csr_matrix
-from scipy.sparse import coo_matrix
-from scipy.sparse import dok_matrix
-from scipy.sparse import lil_matrix
 
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_array_almost_equal
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils.estimator_checks import _NotAnArray
-
-from sklearn.utils.multiclass import unique_labels
-from sklearn.utils.multiclass import is_multilabel
-from sklearn.utils.multiclass import type_of_target
-from sklearn.utils.multiclass import class_distribution
-from sklearn.utils.multiclass import check_classification_targets
-from sklearn.utils.multiclass import _ovr_decision_function
+import numpy as np
+import pytest
+import scipy.sparse as sp
+from scipy.sparse import (
+    coo_matrix,
+    csc_matrix,
+    csr_matrix,
+    dok_matrix,
+    issparse,
+    lil_matrix,
+)
 
-from sklearn.utils.metaestimators import _safe_split
+from sklearn import datasets
 from sklearn.model_selection import ShuffleSplit
 from sklearn.svm import SVC
-from sklearn import datasets
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.estimator_checks import _NotAnArray
+from sklearn.utils.metaestimators import _safe_split
+from sklearn.utils.multiclass import (
+    _ovr_decision_function,
+    check_classification_targets,
+    class_distribution,
+    is_multilabel,
+    type_of_target,
+    unique_labels,
+)
 
 sparse_multilable_explicit_zero = csc_matrix(np.array([[0, 1], [1, 0]]))
 sparse_multilable_explicit_zero[:, 0] = 0
diff --git a/sklearn/utils/tests/test_murmurhash.py b/sklearn/utils/tests/test_murmurhash.py
index 4403c9a49275c..18730302124f9 100644
--- a/sklearn/utils/tests/test_murmurhash.py
+++ b/sklearn/utils/tests/test_murmurhash.py
@@ -3,9 +3,9 @@
 # License: BSD 3 clause
 
 import numpy as np
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+
 from sklearn.utils.murmurhash import murmurhash3_32
-from numpy.testing import assert_array_almost_equal
-from numpy.testing import assert_array_equal
 
 
 def test_mmhash3_int():
diff --git a/sklearn/utils/tests/test_optimize.py b/sklearn/utils/tests/test_optimize.py
index 82719635366b0..a8bcd1aebf793 100644
--- a/sklearn/utils/tests/test_optimize.py
+++ b/sklearn/utils/tests/test_optimize.py
@@ -1,9 +1,8 @@
 import numpy as np
-
-from sklearn.utils.optimize import _newton_cg
 from scipy.optimize import fmin_ncg
 
 from sklearn.utils._testing import assert_array_almost_equal
+from sklearn.utils.optimize import _newton_cg
 
 
 def test_newton_cg():
diff --git a/sklearn/utils/tests/test_parallel.py b/sklearn/utils/tests/test_parallel.py
index 2f56c584300d1..3a359ef8690e5 100644
--- a/sklearn/utils/tests/test_parallel.py
+++ b/sklearn/utils/tests/test_parallel.py
@@ -12,8 +12,7 @@
 from sklearn.model_selection import GridSearchCV
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
-
-from sklearn.utils.parallel import delayed, Parallel
+from sklearn.utils.parallel import Parallel, delayed
 
 
 def get_working_memory():
diff --git a/sklearn/utils/tests/test_param_validation.py b/sklearn/utils/tests/test_param_validation.py
index 022f9f373a049..2af84707cd2ed 100644
--- a/sklearn/utils/tests/test_param_validation.py
+++ b/sklearn/utils/tests/test_param_validation.py
@@ -1,41 +1,46 @@
 from numbers import Integral, Real
 
 import numpy as np
-from scipy.sparse import csr_matrix
 import pytest
+from scipy.sparse import csr_matrix
 
 from sklearn._config import config_context, get_config
-from sklearn.base import BaseEstimator
-from sklearn.base import _fit_context
+from sklearn.base import BaseEstimator, _fit_context
 from sklearn.model_selection import LeaveOneOut
 from sklearn.utils import deprecated
-from sklearn.utils._param_validation import Hidden
-from sklearn.utils._param_validation import Interval
-from sklearn.utils._param_validation import Options
-from sklearn.utils._param_validation import StrOptions
-from sklearn.utils._param_validation import _ArrayLikes
-from sklearn.utils._param_validation import _Booleans
-from sklearn.utils._param_validation import _Callables
-from sklearn.utils._param_validation import _CVObjects
-from sklearn.utils._param_validation import _InstancesOf
-from sklearn.utils._param_validation import MissingValues
-from sklearn.utils._param_validation import _PandasNAConstraint
-from sklearn.utils._param_validation import _IterablesNotString
-from sklearn.utils._param_validation import _NoneConstraint
-from sklearn.utils._param_validation import _RandomStates
-from sklearn.utils._param_validation import _SparseMatrices
-from sklearn.utils._param_validation import _VerboseHelper
-from sklearn.utils._param_validation import HasMethods
-from sklearn.utils._param_validation import make_constraint
-from sklearn.utils._param_validation import generate_invalid_param_val
-from sklearn.utils._param_validation import generate_valid_param
-from sklearn.utils._param_validation import validate_params
-from sklearn.utils._param_validation import InvalidParameterError
-from sklearn.utils._param_validation import RealNotInt
+from sklearn.utils._param_validation import (
+    HasMethods,
+    Hidden,
+    Interval,
+    InvalidParameterError,
+    MissingValues,
+    Options,
+    RealNotInt,
+    StrOptions,
+    _ArrayLikes,
+    _Booleans,
+    _Callables,
+    _CVObjects,
+    _InstancesOf,
+    _IterablesNotString,
+    _NanConstraint,
+    _NoneConstraint,
+    _PandasNAConstraint,
+    _RandomStates,
+    _SparseMatrices,
+    _VerboseHelper,
+    generate_invalid_param_val,
+    generate_valid_param,
+    make_constraint,
+    validate_params,
+)
 
 
 # Some helpers for the tests
-@validate_params({"a": [Real], "b": [Real], "c": [Real], "d": [Real]})
+@validate_params(
+    {"a": [Real], "b": [Real], "c": [Real], "d": [Real]},
+    prefer_skip_nested_validation=True,
+)
 def _func(a, b=0, *args, c, d=0, **kwargs):
     """A function to test the validation of functions."""
 
@@ -43,12 +48,12 @@ def _func(a, b=0, *args, c, d=0, **kwargs):
 class _Class:
     """A class to test the _InstancesOf constraint and the validation of methods."""
 
-    @validate_params({"a": [Real]})
+    @validate_params({"a": [Real]}, prefer_skip_nested_validation=True)
     def _method(self, a):
         """A validated method"""
 
     @deprecated()
-    @validate_params({"a": [Real]})
+    @validate_params({"a": [Real]}, prefer_skip_nested_validation=True)
     def _deprecated_method(self, a):
         """A deprecated validated method"""
 
@@ -383,6 +388,7 @@ def test_generate_valid_param(constraint):
         (Real, 0.5),
         ("boolean", False),
         ("verbose", 1),
+        ("nan", np.nan),
         (MissingValues(), -1),
         (MissingValues(), -1.0),
         (MissingValues(), None),
@@ -416,6 +422,7 @@ def test_is_satisfied_by(constraint_declaration, value):
         (MissingValues(numeric_only=True), MissingValues),
         (HasMethods("fit"), HasMethods),
         ("cv_object", _CVObjects),
+        ("nan", _NanConstraint),
     ],
 )
 def test_make_constraint(constraint_declaration, expected_constraint_class):
@@ -469,7 +476,7 @@ def test_validate_params_missing_params():
     constraints
     """
 
-    @validate_params({"a": [int]})
+    @validate_params({"a": [int]}, prefer_skip_nested_validation=True)
     def func(a, b):
         pass
 
@@ -527,7 +534,9 @@ def test_stroptions_deprecated_subset():
 def test_hidden_constraint():
     """Check that internal constraints are not exposed in the error message."""
 
-    @validate_params({"param": [Hidden(list), dict]})
+    @validate_params(
+        {"param": [Hidden(list), dict]}, prefer_skip_nested_validation=True
+    )
     def f(param):
         pass
 
@@ -549,7 +558,10 @@ def f(param):
 def test_hidden_stroptions():
     """Check that we can have 2 StrOptions constraints, one being hidden."""
 
-    @validate_params({"param": [StrOptions({"auto"}), Hidden(StrOptions({"warn"}))]})
+    @validate_params(
+        {"param": [StrOptions({"auto"}), Hidden(StrOptions({"warn"}))]},
+        prefer_skip_nested_validation=True,
+    )
     def f(param):
         pass
 
@@ -581,7 +593,7 @@ def test_boolean_constraint_deprecated_int():
     validation when using an int for a parameter accepting a boolean.
     """
 
-    @validate_params({"param": ["boolean"]})
+    @validate_params({"param": ["boolean"]}, prefer_skip_nested_validation=True)
     def f(param):
         pass
 
@@ -599,7 +611,10 @@ def f(param):
 def test_no_validation():
     """Check that validation can be skipped for a parameter."""
 
-    @validate_params({"param1": [int, None], "param2": "no_validation"})
+    @validate_params(
+        {"param1": [int, None], "param2": "no_validation"},
+        prefer_skip_nested_validation=True,
+    )
     def f(param1=None, param2=None):
         pass
 
@@ -680,7 +695,7 @@ def test_real_not_int():
 def test_skip_param_validation():
     """Check that param validation can be skipped using config_context."""
 
-    @validate_params({"a": [int]})
+    @validate_params({"a": [int]}, prefer_skip_nested_validation=True)
     def f(a):
         pass
 
@@ -696,7 +711,7 @@ def f(a):
 def test_skip_nested_validation(prefer_skip_nested_validation):
     """Check that nested validation can be skipped."""
 
-    @validate_params({"a": [int]})
+    @validate_params({"a": [int]}, prefer_skip_nested_validation=True)
     def f(a):
         pass
 
diff --git a/sklearn/utils/tests/test_plotting.py b/sklearn/utils/tests/test_plotting.py
index 00b1f7f74fcd0..b2448c2b044e1 100644
--- a/sklearn/utils/tests/test_plotting.py
+++ b/sklearn/utils/tests/test_plotting.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from sklearn.utils._plotting import _validate_score_name, _interval_max_min_ratio
+from sklearn.utils._plotting import _interval_max_min_ratio, _validate_score_name
 
 
 def metric():
diff --git a/sklearn/utils/tests/test_pprint.py b/sklearn/utils/tests/test_pprint.py
index a4aaa8f21b6b7..ec48c4a012574 100644
--- a/sklearn/utils/tests/test_pprint.py
+++ b/sklearn/utils/tests/test_pprint.py
@@ -12,7 +12,7 @@
 
 
 # Ignore flake8 (lots of line too long issues)
-# flake8: noqa
+# ruff: noqa
 
 
 # Constructors excerpted to test pprinting
diff --git a/sklearn/utils/tests/test_random.py b/sklearn/utils/tests/test_random.py
index 192d112337439..04a8ee371f358 100644
--- a/sklearn/utils/tests/test_random.py
+++ b/sklearn/utils/tests/test_random.py
@@ -1,11 +1,11 @@
 import numpy as np
 import pytest
 import scipy.sparse as sp
-from scipy.special import comb
 from numpy.testing import assert_array_almost_equal
+from scipy.special import comb
 
-from sklearn.utils.random import _random_choice_csc, sample_without_replacement
 from sklearn.utils._random import _our_rand_r_py
+from sklearn.utils.random import _random_choice_csc, sample_without_replacement
 
 
 ###############################################################################
diff --git a/sklearn/utils/tests/test_response.py b/sklearn/utils/tests/test_response.py
index a0e7c30a81a77..e715e94cc60b1 100644
--- a/sklearn/utils/tests/test_response.py
+++ b/sklearn/utils/tests/test_response.py
@@ -1,18 +1,22 @@
 import numpy as np
 import pytest
 
-from sklearn.datasets import load_iris, make_classification, make_regression
+from sklearn.datasets import (
+    load_iris,
+    make_classification,
+    make_multilabel_classification,
+    make_regression,
+)
 from sklearn.linear_model import (
     LinearRegression,
     LogisticRegression,
 )
+from sklearn.multioutput import ClassifierChain
 from sklearn.preprocessing import scale
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.utils._mocking import _MockEstimatorOnOffPrediction
-from sklearn.utils._testing import assert_allclose, assert_array_equal
-
 from sklearn.utils._response import _get_response_values, _get_response_values_binary
-
+from sklearn.utils._testing import assert_allclose, assert_array_equal
 
 X, y = load_iris(return_X_y=True)
 # scale the data to avoid ConvergenceWarning with LogisticRegression
@@ -232,3 +236,26 @@ def test_get_response_values_multiclass(estimator, response_method):
     assert predictions.shape == (X.shape[0], len(estimator.classes_))
     if response_method == "predict_proba":
         assert np.logical_and(predictions >= 0, predictions <= 1).all()
+
+
+@pytest.mark.parametrize(
+    "response_method", ["predict_proba", "decision_function", "predict"]
+)
+def test_get_response_values_multilabel_indicator(response_method):
+    X, Y = make_multilabel_classification(random_state=0)
+    estimator = ClassifierChain(LogisticRegression()).fit(X, Y)
+
+    y_pred, pos_label = _get_response_values(
+        estimator, X, response_method=response_method
+    )
+    assert pos_label is None
+    assert y_pred.shape == Y.shape
+
+    if response_method == "predict_proba":
+        assert np.logical_and(y_pred >= 0, y_pred <= 1).all()
+    elif response_method == "decision_function":
+        # values returned by `decision_function` are not bounded in [0, 1]
+        assert (y_pred < 0).sum() > 0
+        assert (y_pred > 1).sum() > 0
+    else:  # response_method == "predict"
+        assert np.logical_or(y_pred == 0, y_pred == 1).all()
diff --git a/sklearn/utils/tests/test_seq_dataset.py b/sklearn/utils/tests/test_seq_dataset.py
index 5c876fe62d74b..18f1be208b3f1 100644
--- a/sklearn/utils/tests/test_seq_dataset.py
+++ b/sklearn/utils/tests/test_seq_dataset.py
@@ -7,14 +7,14 @@
 import pytest
 import scipy.sparse as sp
 from numpy.testing import assert_array_equal
+
+from sklearn.datasets import load_iris
 from sklearn.utils._seq_dataset import (
     ArrayDataset32,
     ArrayDataset64,
     CSRDataset32,
     CSRDataset64,
 )
-
-from sklearn.datasets import load_iris
 from sklearn.utils._testing import assert_allclose
 
 iris = load_iris()
diff --git a/sklearn/utils/tests/test_set_output.py b/sklearn/utils/tests/test_set_output.py
index 6c99e82c3020f..d1722a1553f9c 100644
--- a/sklearn/utils/tests/test_set_output.py
+++ b/sklearn/utils/tests/test_set_output.py
@@ -1,15 +1,17 @@
-import pytest
 from collections import namedtuple
 
 import numpy as np
-from scipy.sparse import csr_matrix
+import pytest
 from numpy.testing import assert_array_equal
+from scipy.sparse import csr_matrix
 
 from sklearn._config import config_context, get_config
-from sklearn.utils._set_output import _wrap_in_pandas_container
-from sklearn.utils._set_output import _safe_set_output
-from sklearn.utils._set_output import _SetOutputMixin
-from sklearn.utils._set_output import _get_output_config
+from sklearn.utils._set_output import (
+    _get_output_config,
+    _safe_set_output,
+    _SetOutputMixin,
+    _wrap_in_pandas_container,
+)
 
 
 def test__wrap_in_pandas_container_dense():
@@ -313,3 +315,32 @@ def test_set_output_named_tuple_out():
     assert isinstance(X_trans, Output)
     assert_array_equal(X_trans.X, X)
     assert_array_equal(X_trans.Y, 2 * X)
+
+
+class EstimatorWithListInput(_SetOutputMixin):
+    def fit(self, X, y=None):
+        assert isinstance(X, list)
+        self.n_features_in_ = len(X[0])
+        return self
+
+    def transform(self, X, y=None):
+        return X
+
+    def get_feature_names_out(self, input_features=None):
+        return np.asarray([f"X{i}" for i in range(self.n_features_in_)], dtype=object)
+
+
+def test_set_output_list_input():
+    """Check set_output for list input.
+
+    Non-regression test for #27037.
+    """
+    pd = pytest.importorskip("pandas")
+
+    X = [[0, 1, 2, 3], [4, 5, 6, 7]]
+    est = EstimatorWithListInput()
+    est.set_output(transform="pandas")
+
+    X_out = est.fit(X).transform(X)
+    assert isinstance(X_out, pd.DataFrame)
+    assert_array_equal(X_out.columns, ["X0", "X1", "X2", "X3"])
diff --git a/sklearn/utils/tests/test_shortest_path.py b/sklearn/utils/tests/test_shortest_path.py
index 7994f1f48863a..c070ccd70b63d 100644
--- a/sklearn/utils/tests/test_shortest_path.py
+++ b/sklearn/utils/tests/test_shortest_path.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 from numpy.testing import assert_array_almost_equal
+
 from sklearn.utils.graph import single_source_shortest_path_length
 
 
diff --git a/sklearn/utils/tests/test_show_versions.py b/sklearn/utils/tests/test_show_versions.py
index e6590bfde15f5..bd166dfd8e522 100644
--- a/sklearn/utils/tests/test_show_versions.py
+++ b/sklearn/utils/tests/test_show_versions.py
@@ -1,8 +1,6 @@
-from sklearn.utils.fixes import threadpool_info
-from sklearn.utils._show_versions import _get_sys_info
-from sklearn.utils._show_versions import _get_deps_info
-from sklearn.utils._show_versions import show_versions
+from sklearn.utils._show_versions import _get_deps_info, _get_sys_info, show_versions
 from sklearn.utils._testing import ignore_warnings
+from sklearn.utils.fixes import threadpool_info
 
 
 def test_get_sys_info():
diff --git a/sklearn/utils/tests/test_sparsefuncs.py b/sklearn/utils/tests/test_sparsefuncs.py
index 6a86be2f0445f..886367d480fc1 100644
--- a/sklearn/utils/tests/test_sparsefuncs.py
+++ b/sklearn/utils/tests/test_sparsefuncs.py
@@ -1,30 +1,29 @@
-import pytest
 import numpy as np
+import pytest
 import scipy.sparse as sp
-
-from scipy import linalg
-from numpy.testing import assert_array_almost_equal, assert_array_equal
 from numpy.random import RandomState
+from numpy.testing import assert_array_almost_equal, assert_array_equal
+from scipy import linalg
 
 from sklearn.datasets import make_classification
+from sklearn.utils._testing import assert_allclose
 from sklearn.utils.sparsefuncs import (
-    mean_variance_axis,
+    count_nonzero,
+    csc_median_axis_0,
     incr_mean_variance_axis,
     inplace_column_scale,
     inplace_row_scale,
-    inplace_swap_row,
     inplace_swap_column,
+    inplace_swap_row,
+    mean_variance_axis,
     min_max_axis,
-    count_nonzero,
-    csc_median_axis_0,
 )
 from sklearn.utils.sparsefuncs_fast import (
     assign_rows_csr,
+    csr_row_norms,
     inplace_csr_row_normalize_l1,
     inplace_csr_row_normalize_l2,
-    csr_row_norms,
 )
-from sklearn.utils._testing import assert_allclose
 
 
 def test_mean_variance_axis0():
@@ -456,9 +455,9 @@ def test_incr_mean_variance_axis_equivalence_mean_variance(X1, X2):
         X2, axis=axis, last_mean=updated_mean, last_var=updated_var, last_n=updated_n
     )
     X = sp.vstack([X1, X2])
-    assert_allclose(updated_mean, np.nanmean(X.A, axis=axis))
-    assert_allclose(updated_var, np.nanvar(X.A, axis=axis))
-    assert_allclose(updated_n, np.count_nonzero(~np.isnan(X.A), axis=0))
+    assert_allclose(updated_mean, np.nanmean(X.toarray(), axis=axis))
+    assert_allclose(updated_var, np.nanvar(X.toarray(), axis=axis))
+    assert_allclose(updated_n, np.count_nonzero(~np.isnan(X.toarray()), axis=0))
 
 
 def test_incr_mean_variance_no_new_n():
diff --git a/sklearn/utils/tests/test_testing.py b/sklearn/utils/tests/test_testing.py
index 5875eb96bfd8e..a3e211ba6dd84 100644
--- a/sklearn/utils/tests/test_testing.py
+++ b/sklearn/utils/tests/test_testing.py
@@ -1,35 +1,32 @@
-import warnings
-import unittest
-import os
 import atexit
+import os
+import unittest
+import warnings
 
 import numpy as np
-
-from scipy import sparse
-
 import pytest
+from scipy import sparse
 
-from sklearn.utils.deprecation import deprecated
-from sklearn.utils.metaestimators import available_if
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.tree import DecisionTreeClassifier
 from sklearn.utils._testing import (
-    assert_raises,
+    TempMemmap,
+    _convert_container,
+    _delete_folder,
+    assert_allclose,
+    assert_allclose_dense_sparse,
     assert_no_warnings,
-    set_random_state,
     assert_raise_message,
-    ignore_warnings,
-    check_docstring_parameters,
-    assert_allclose_dense_sparse,
+    assert_raises,
     assert_raises_regex,
-    TempMemmap,
+    check_docstring_parameters,
     create_memmap_backed_data,
-    _delete_folder,
-    _convert_container,
+    ignore_warnings,
     raises,
-    assert_allclose,
+    set_random_state,
 )
-
-from sklearn.tree import DecisionTreeClassifier
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.utils.deprecation import deprecated
+from sklearn.utils.metaestimators import available_if
 
 
 def test_set_random_state():
diff --git a/sklearn/utils/tests/test_utils.py b/sklearn/utils/tests/test_utils.py
index 2365a587f3b72..efb9c8666901c 100644
--- a/sklearn/utils/tests/test_utils.py
+++ b/sklearn/utils/tests/test_utils.py
@@ -1,37 +1,40 @@
-from copy import copy
-from itertools import chain
-import warnings
 import string
 import timeit
+import warnings
+from copy import copy
+from itertools import chain
 
-import pytest
 import numpy as np
+import pytest
 import scipy.sparse as sp
 
+from sklearn import config_context
+from sklearn.utils import (
+    _approximate_mode,
+    _determine_key_type,
+    _get_column_indices,
+    _message_with_time,
+    _print_elapsed_time,
+    _safe_assign,
+    _safe_indexing,
+    _to_object_array,
+    check_random_state,
+    column_or_1d,
+    deprecated,
+    gen_even_slices,
+    get_chunk_n_rows,
+    is_scalar_nan,
+    resample,
+    safe_mask,
+    shuffle,
+)
+from sklearn.utils._mocking import MockDataFrame
 from sklearn.utils._testing import (
-    assert_array_equal,
+    _convert_container,
     assert_allclose_dense_sparse,
+    assert_array_equal,
     assert_no_warnings,
-    _convert_container,
 )
-from sklearn.utils import check_random_state
-from sklearn.utils import _determine_key_type
-from sklearn.utils import deprecated
-from sklearn.utils import _get_column_indices
-from sklearn.utils import resample
-from sklearn.utils import safe_mask
-from sklearn.utils import column_or_1d
-from sklearn.utils import _safe_indexing
-from sklearn.utils import _safe_assign
-from sklearn.utils import shuffle
-from sklearn.utils import gen_even_slices
-from sklearn.utils import _message_with_time, _print_elapsed_time
-from sklearn.utils import get_chunk_n_rows
-from sklearn.utils import is_scalar_nan
-from sklearn.utils import _to_object_array
-from sklearn.utils import _approximate_mode
-from sklearn.utils._mocking import MockDataFrame
-from sklearn import config_context
 
 # toy array
 X_toy = np.arange(9).reshape((3, 3))
@@ -522,16 +525,16 @@ def test_shuffle_dont_convert_to_array():
     a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0)
 
     assert a_s == ["c", "b", "a"]
-    assert type(a_s) == list
+    assert type(a_s) == list  # noqa: E721
 
     assert_array_equal(b_s, ["c", "b", "a"])
     assert b_s.dtype == object
 
     assert c_s == [3, 2, 1]
-    assert type(c_s) == list
+    assert type(c_s) == list  # noqa: E721
 
     assert_array_equal(d_s, np.array([["c", 2], ["b", 1], ["a", 0]], dtype=object))
-    assert type(d_s) == MockDataFrame
+    assert type(d_s) == MockDataFrame  # noqa: E721
 
     assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], [0, 1]]))
 
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 2d39279f81745..0a030e90839ed 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -1,74 +1,77 @@
 """Tests for input validation functions"""
 
 import numbers
-import warnings
 import re
-
-from tempfile import NamedTemporaryFile
+import warnings
 from itertools import product
 from operator import itemgetter
+from tempfile import NamedTemporaryFile
 
-import pytest
-from pytest import importorskip
 import numpy as np
+import pytest
 import scipy.sparse as sp
+from pytest import importorskip
 
+import sklearn
 from sklearn._config import config_context
-from sklearn.utils._testing import assert_no_warnings
-from sklearn.utils._testing import ignore_warnings
-from sklearn.utils._testing import SkipTest
-from sklearn.utils._testing import assert_array_equal
-from sklearn.utils._testing import assert_allclose_dense_sparse
-from sklearn.utils._testing import assert_allclose
-from sklearn.utils._testing import _convert_container
-from sklearn.utils import as_float_array, check_array, check_symmetric
-from sklearn.utils import check_X_y
-from sklearn.utils import deprecated
+from sklearn.base import BaseEstimator
+from sklearn.datasets import make_blobs
+from sklearn.ensemble import RandomForestRegressor
+from sklearn.exceptions import NotFittedError, PositiveSpectrumWarning
+from sklearn.linear_model import ARDRegression
 
 # TODO: add this estimator into the _mocking module in a further refactoring
 from sklearn.metrics.tests.test_score_objects import EstimatorWithFit
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.random_projection import _sparse_random_matrix
+from sklearn.svm import SVR
+from sklearn.utils import (
+    _safe_indexing,
+    as_float_array,
+    check_array,
+    check_symmetric,
+    check_X_y,
+    deprecated,
+)
 from sklearn.utils._mocking import (
     MockDataFrame,
     _MockEstimatorOnOffPrediction,
 )
-from sklearn.utils.fixes import parse_version
+from sklearn.utils._testing import (
+    SkipTest,
+    TempMemmap,
+    _convert_container,
+    assert_allclose,
+    assert_allclose_dense_sparse,
+    assert_array_equal,
+    assert_no_warnings,
+    ignore_warnings,
+    skip_if_array_api_compat_not_configured,
+)
 from sklearn.utils.estimator_checks import _NotAnArray
-from sklearn.random_projection import _sparse_random_matrix
-from sklearn.linear_model import ARDRegression
-from sklearn.neighbors import KNeighborsClassifier
-from sklearn.ensemble import RandomForestRegressor
-from sklearn.svm import SVR
-from sklearn.datasets import make_blobs
-from sklearn.utils import _safe_indexing
+from sklearn.utils.fixes import parse_version
 from sklearn.utils.validation import (
-    has_fit_parameter,
+    FLOAT_DTYPES,
+    _allclose_dense_sparse,
+    _check_feature_names_in,
+    _check_fit_params,
+    _check_psd_eigenvalues,
+    _check_response_method,
+    _check_sample_weight,
+    _check_y,
+    _deprecate_positional_args,
+    _get_feature_names,
     _is_fitted,
-    check_is_fitted,
-    check_consistent_length,
+    _num_features,
+    _num_samples,
     assert_all_finite,
+    check_consistent_length,
+    check_is_fitted,
     check_memory,
     check_non_negative,
-    _num_samples,
     check_scalar,
-    _check_psd_eigenvalues,
-    _check_y,
-    _deprecate_positional_args,
-    _check_sample_weight,
-    _allclose_dense_sparse,
-    _num_features,
-    FLOAT_DTYPES,
-    _get_feature_names,
-    _check_feature_names_in,
-    _check_fit_params,
-    _check_response_method,
+    has_fit_parameter,
 )
-from sklearn.base import BaseEstimator
-import sklearn
-
-from sklearn.exceptions import NotFittedError, PositiveSpectrumWarning
-
-from sklearn.utils._testing import TempMemmap
-from sklearn.utils._testing import skip_if_array_api_compat_not_configured
 
 
 def test_as_float_array():
@@ -1582,7 +1585,7 @@ def test_check_pandas_sparse_invalid(ntype1, ntype2):
 @pytest.mark.parametrize(
     "ntype1, ntype2, expected_subtype",
     [
-        ("longfloat", "longdouble", np.floating),
+        ("double", "longdouble", np.floating),
         ("float16", "half", np.floating),
         ("single", "float32", np.floating),
         ("double", "float64", np.floating),
diff --git a/sklearn/utils/tests/test_weight_vector.py b/sklearn/utils/tests/test_weight_vector.py
index 627d46d1fda06..0b19792475e06 100644
--- a/sklearn/utils/tests/test_weight_vector.py
+++ b/sklearn/utils/tests/test_weight_vector.py
@@ -1,5 +1,6 @@
 import numpy as np
 import pytest
+
 from sklearn.utils._weight_vector import (
     WeightVector32,
     WeightVector64,
diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py
index 8ceef15986567..57c56ebd9307d 100644
--- a/sklearn/utils/validation.py
+++ b/sklearn/utils/validation.py
@@ -9,30 +9,23 @@
 #          Sylvain Marie
 # License: BSD 3 clause
 
-from functools import reduce, wraps
-import warnings
 import numbers
 import operator
+import warnings
+from contextlib import suppress
+from functools import reduce, wraps
+from inspect import Parameter, isclass, signature
 
+import joblib
 import numpy as np
 import scipy.sparse as sp
-from inspect import signature, isclass, Parameter
-
-# mypy error: Module 'numpy.core.numeric' has no attribute 'ComplexWarning'
-from numpy.core.numeric import ComplexWarning  # type: ignore
-import joblib
 
-from contextlib import suppress
-
-from .fixes import _object_dtype_isnan
 from .. import get_config as _get_config
-from ..exceptions import PositiveSpectrumWarning
-from ..exceptions import NotFittedError
-from ..exceptions import DataConversionWarning
-from ..utils._array_api import get_namespace
-from ..utils._array_api import _asarray_with_order
-from ..utils._array_api import _is_numpy_namespace
-from ._isfinite import cy_isfinite, FiniteStatus
+from ..exceptions import DataConversionWarning, NotFittedError, PositiveSpectrumWarning
+from ..utils._array_api import _asarray_with_order, _is_numpy_namespace, get_namespace
+from ..utils.fixes import ComplexWarning
+from ._isfinite import FiniteStatus, cy_isfinite
+from .fixes import _object_dtype_isnan
 
 FLOAT_DTYPES = (np.float64, np.float32, np.float16)
 
@@ -607,12 +600,12 @@ def _check_estimator_name(estimator):
 def _pandas_dtype_needs_early_conversion(pd_dtype):
     """Return True if pandas extension pd_dtype need to be converted early."""
     # Check these early for pandas versions without extension dtypes
+    from pandas import SparseDtype
     from pandas.api.types import (
         is_bool_dtype,
         is_float_dtype,
         is_integer_dtype,
     )
-    from pandas import SparseDtype
 
     if is_bool_dtype(pd_dtype):
         # bool and extension booleans need early conversion because __array__
@@ -1080,7 +1073,8 @@ def check_X_y(
         performed if the dtype of the input is not in the list.
 
     order : {'F', 'C'}, default=None
-        Whether an array will be forced to be fortran or c-style.
+        Whether an array will be forced to be fortran or c-style. If
+        `None`, then the input data's order is preserved when possible.
 
     copy : bool, default=False
         Whether a forced copy will be triggered. If copy=False, a copy might
@@ -2247,7 +2241,7 @@ def _check_pos_label_consistency(pos_label, y_true):
             or np.array_equal(classes, [1])
         )
     ):
-        classes_repr = ", ".join(repr(c) for c in classes)
+        classes_repr = ", ".join([repr(c) for c in classes.tolist()])
         raise ValueError(
             f"y_true takes value in {{{classes_repr}}} and pos_label is not "
             "specified: either make y_true take value in {0, 1} or "