diff --git a/doc/whats_new/v1.6.rst b/doc/whats_new/v1.6.rst
index e1e7a1f01f2f8..a05af28cb1df4 100644
--- a/doc/whats_new/v1.6.rst
+++ b/doc/whats_new/v1.6.rst
@@ -114,6 +114,15 @@ Changelog
   on the input data.
   :pr:`29124` by :user:`Yao Xiao <Charlie-XIAO>`.
 
+
+:mod:`sklearn.datasets`
+.......................
+
+- |Feature| :func:`datasets.fetch_file` allows downloading arbitrary data-file
+  from the web. It handles local caching, integrity checks with SHA256 digests
+  and automatic retries in case of HTTP errors. :pr:`29354` by :user:`Olivier
+  Grisel <ogrisel>`.
+
 :mod:`sklearn.discriminant_analysis`
 ....................................
 
diff --git a/examples/applications/plot_time_series_lagged_features.py b/examples/applications/plot_time_series_lagged_features.py
index 9159825cbbd43..2efc12acae276 100644
--- a/examples/applications/plot_time_series_lagged_features.py
+++ b/examples/applications/plot_time_series_lagged_features.py
@@ -1,425 +1,435 @@
-"""
-===========================================
-Lagged features for time series forecasting
-===========================================
-
-This example demonstrates how Polars-engineered lagged features can be used
-for time series forecasting with
-:class:`~sklearn.ensemble.HistGradientBoostingRegressor` on the Bike Sharing
-Demand dataset.
-
-See the example on
-:ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`
-for some data exploration on this dataset and a demo on periodic feature
-engineering.
-
-"""
-
-# %%
-# Analyzing the Bike Sharing Demand dataset
-# -----------------------------------------
-#
-# We start by loading the data from the OpenML repository
-# as a pandas dataframe. This will be replaced with Polars
-# once `fetch_openml` adds a native support for it.
-# We convert to Polars for feature engineering, as it automatically caches
-# common subexpressions which are reused in multiple expressions
-# (like `pl.col("count").shift(1)` below). See
-# https://docs.pola.rs/user-guide/lazy/optimizations/ for more information.
-
-import numpy as np
-import polars as pl
-
-from sklearn.datasets import fetch_openml
-
-pl.Config.set_fmt_str_lengths(20)
-
-bike_sharing = fetch_openml(
-    "Bike_Sharing_Demand", version=2, as_frame=True, parser="pandas"
-)
-df = bike_sharing.frame
-df = pl.DataFrame({col: df[col].to_numpy() for col in df.columns})
-
-# %%
-# Next, we take a look at the statistical summary of the dataset
-# so that we can better understand the data that we are working with.
-import polars.selectors as cs
-
-summary = df.select(cs.numeric()).describe()
-summary
-
-# %%
-# Let us look at the count of the seasons `"fall"`, `"spring"`, `"summer"`
-# and `"winter"` present in the dataset to confirm they are balanced.
-
-import matplotlib.pyplot as plt
-
-df["season"].value_counts()
-
-
-# %%
-# Generating Polars-engineered lagged features
-# --------------------------------------------
-# Let's consider the problem of predicting the demand at the
-# next hour given past demands. Since the demand is a continuous
-# variable, one could intuitively use any regression model. However, we do
-# not have the usual `(X_train, y_train)` dataset. Instead, we just have
-# the `y_train` demand data sequentially organized by time.
-lagged_df = df.select(
-    "count",
-    *[pl.col("count").shift(i).alias(f"lagged_count_{i}h") for i in [1, 2, 3]],
-    lagged_count_1d=pl.col("count").shift(24),
-    lagged_count_1d_1h=pl.col("count").shift(24 + 1),
-    lagged_count_7d=pl.col("count").shift(7 * 24),
-    lagged_count_7d_1h=pl.col("count").shift(7 * 24 + 1),
-    lagged_mean_24h=pl.col("count").shift(1).rolling_mean(24),
-    lagged_max_24h=pl.col("count").shift(1).rolling_max(24),
-    lagged_min_24h=pl.col("count").shift(1).rolling_min(24),
-    lagged_mean_7d=pl.col("count").shift(1).rolling_mean(7 * 24),
-    lagged_max_7d=pl.col("count").shift(1).rolling_max(7 * 24),
-    lagged_min_7d=pl.col("count").shift(1).rolling_min(7 * 24),
-)
-lagged_df.tail(10)
-
-# %%
-# Watch out however, the first lines have undefined values because their own
-# past is unknown. This depends on how much lag we used:
-lagged_df.head(10)
-
-# %%
-# We can now separate the lagged features in a matrix `X` and the target variable
-# (the counts to predict) in an array of the same first dimension `y`.
-lagged_df = lagged_df.drop_nulls()
-X = lagged_df.drop("count")
-y = lagged_df["count"]
-print("X shape: {}\ny shape: {}".format(X.shape, y.shape))
-
-# %%
-# Naive evaluation of the next hour bike demand regression
-# --------------------------------------------------------
-# Let's randomly split our tabularized dataset to train a gradient
-# boosting regression tree (GBRT) model and evaluate it using Mean
-# Absolute Percentage Error (MAPE). If our model is aimed at forecasting
-# (i.e., predicting future data from past data), we should not use training
-# data that are ulterior to the testing data. In time series machine learning
-# the "i.i.d" (independent and identically distributed) assumption does not
-# hold true as the data points are not independent and have a temporal
-# relationship.
-from sklearn.ensemble import HistGradientBoostingRegressor
-from sklearn.model_selection import train_test_split
-
-X_train, X_test, y_train, y_test = train_test_split(
-    X, y, test_size=0.2, random_state=42
-)
-
-model = HistGradientBoostingRegressor().fit(X_train, y_train)
-
-# %%
-# Taking a look at the performance of the model.
-from sklearn.metrics import mean_absolute_percentage_error
-
-y_pred = model.predict(X_test)
-mean_absolute_percentage_error(y_test, y_pred)
-
-# %%
-# Proper next hour forecasting evaluation
-# ---------------------------------------
-# Let's use a proper evaluation splitting strategies that takes into account
-# the temporal structure of the dataset to evaluate our model's ability to
-# predict data points in the future (to avoid cheating by reading values from
-# the lagged features in the training set).
-from sklearn.model_selection import TimeSeriesSplit
-
-ts_cv = TimeSeriesSplit(
-    n_splits=3,  # to keep the notebook fast enough on common laptops
-    gap=48,  # 2 days data gap between train and test
-    max_train_size=10000,  # keep train sets of comparable sizes
-    test_size=3000,  # for 2 or 3 digits of precision in scores
-)
-all_splits = list(ts_cv.split(X, y))
-
-# %%
-# Training the model and evaluating its performance based on MAPE.
-train_idx, test_idx = all_splits[0]
-X_train, X_test = X[train_idx, :], X[test_idx, :]
-y_train, y_test = y[train_idx], y[test_idx]
-
-model = HistGradientBoostingRegressor().fit(X_train, y_train)
-y_pred = model.predict(X_test)
-mean_absolute_percentage_error(y_test, y_pred)
-
-# %%
-# The generalization error measured via a shuffled trained test split
-# is too optimistic. The generalization via a time-based split is likely to
-# be more representative of the true performance of the regression model.
-# Let's assess this variability of our error evaluation with proper
-# cross-validation:
-from sklearn.model_selection import cross_val_score
-
-cv_mape_scores = -cross_val_score(
-    model, X, y, cv=ts_cv, scoring="neg_mean_absolute_percentage_error"
-)
-cv_mape_scores
-
-# %%
-# The variability across splits is quite large! In a real life setting
-# it would be advised to use more splits to better assess the variability.
-# Let's report the mean CV scores and their standard deviation from now on.
-print(f"CV MAPE: {cv_mape_scores.mean():.3f} ± {cv_mape_scores.std():.3f}")
-
-# %%
-# We can compute several combinations of evaluation metrics and loss functions,
-# which are reported a bit below.
-from collections import defaultdict
-
-from sklearn.metrics import (
-    make_scorer,
-    mean_absolute_error,
-    mean_pinball_loss,
-    root_mean_squared_error,
-)
-from sklearn.model_selection import cross_validate
-
-
-def consolidate_scores(cv_results, scores, metric):
-    if metric == "MAPE":
-        scores[metric].append(f"{value.mean():.2f} ± {value.std():.2f}")
-    else:
-        scores[metric].append(f"{value.mean():.1f} ± {value.std():.1f}")
-
-    return scores
-
-
-scoring = {
-    "MAPE": make_scorer(mean_absolute_percentage_error),
-    "RMSE": make_scorer(root_mean_squared_error),
-    "MAE": make_scorer(mean_absolute_error),
-    "pinball_loss_05": make_scorer(mean_pinball_loss, alpha=0.05),
-    "pinball_loss_50": make_scorer(mean_pinball_loss, alpha=0.50),
-    "pinball_loss_95": make_scorer(mean_pinball_loss, alpha=0.95),
-}
-loss_functions = ["squared_error", "poisson", "absolute_error"]
-scores = defaultdict(list)
-for loss_func in loss_functions:
-    model = HistGradientBoostingRegressor(loss=loss_func)
-    cv_results = cross_validate(
-        model,
-        X,
-        y,
-        cv=ts_cv,
-        scoring=scoring,
-        n_jobs=2,
-    )
-    time = cv_results["fit_time"]
-    scores["loss"].append(loss_func)
-    scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
-
-    for key, value in cv_results.items():
-        if key.startswith("test_"):
-            metric = key.split("test_")[1]
-            scores = consolidate_scores(cv_results, scores, metric)
-
-
-# %%
-# Modeling predictive uncertainty via quantile regression
-# -------------------------------------------------------
-# Instead of modeling the expected value of the distribution of
-# :math:`Y|X` like the least squares and Poisson losses do, one could try to
-# estimate quantiles of the conditional distribution.
-#
-# :math:`Y|X=x_i` is expected to be a random variable for a given data point
-# :math:`x_i` because we expect that the number of rentals cannot be 100%
-# accurately predicted from the features. It can be influenced by other
-# variables not properly captured by the existing lagged features. For
-# instance whether or not it will rain in the next hour cannot be fully
-# anticipated from the past hours bike rental data. This is what we
-# call aleatoric uncertainty.
-#
-# Quantile regression makes it possible to give a finer description of that
-# distribution without making strong assumptions on its shape.
-quantile_list = [0.05, 0.5, 0.95]
-
-for quantile in quantile_list:
-    model = HistGradientBoostingRegressor(loss="quantile", quantile=quantile)
-    cv_results = cross_validate(
-        model,
-        X,
-        y,
-        cv=ts_cv,
-        scoring=scoring,
-        n_jobs=2,
-    )
-    time = cv_results["fit_time"]
-    scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
-
-    scores["loss"].append(f"quantile {int(quantile*100)}")
-    for key, value in cv_results.items():
-        if key.startswith("test_"):
-            metric = key.split("test_")[1]
-            scores = consolidate_scores(cv_results, scores, metric)
-
-scores_df = pl.DataFrame(scores)
-scores_df
-
-
-# %%
-# Let us take a look at the losses that minimise each metric.
-def min_arg(col):
-    col_split = pl.col(col).str.split(" ")
-    return pl.arg_sort_by(
-        col_split.list.get(0).cast(pl.Float64),
-        col_split.list.get(2).cast(pl.Float64),
-    ).first()
-
-
-scores_df.select(
-    pl.col("loss").get(min_arg(col_name)).alias(col_name)
-    for col_name in scores_df.columns
-    if col_name != "loss"
-)
-
-# %%
-# Even if the score distributions overlap due to the variance in the dataset,
-# it is true that the average RMSE is lower when `loss="squared_error"`, whereas
-# the average MAPE is lower when `loss="absolute_error"` as expected. That is
-# also the case for the Mean Pinball Loss with the quantiles 5 and 95. The score
-# corresponding to the 50 quantile loss is overlapping with the score obtained
-# by minimizing other loss functions, which is also the case for the MAE.
-#
-# A qualitative look at the predictions
-# -------------------------------------
-# We can now visualize the performance of the model with regards
-# to the 5th percentile, median and the 95th percentile:
-all_splits = list(ts_cv.split(X, y))
-train_idx, test_idx = all_splits[0]
-
-X_train, X_test = X[train_idx, :], X[test_idx, :]
-y_train, y_test = y[train_idx], y[test_idx]
-
-max_iter = 50
-gbrt_mean_poisson = HistGradientBoostingRegressor(loss="poisson", max_iter=max_iter)
-gbrt_mean_poisson.fit(X_train, y_train)
-mean_predictions = gbrt_mean_poisson.predict(X_test)
-
-gbrt_median = HistGradientBoostingRegressor(
-    loss="quantile", quantile=0.5, max_iter=max_iter
-)
-gbrt_median.fit(X_train, y_train)
-median_predictions = gbrt_median.predict(X_test)
-
-gbrt_percentile_5 = HistGradientBoostingRegressor(
-    loss="quantile", quantile=0.05, max_iter=max_iter
-)
-gbrt_percentile_5.fit(X_train, y_train)
-percentile_5_predictions = gbrt_percentile_5.predict(X_test)
-
-gbrt_percentile_95 = HistGradientBoostingRegressor(
-    loss="quantile", quantile=0.95, max_iter=max_iter
-)
-gbrt_percentile_95.fit(X_train, y_train)
-percentile_95_predictions = gbrt_percentile_95.predict(X_test)
-
-# %%
-# We can now take a look at the predictions made by the regression models:
-last_hours = slice(-96, None)
-fig, ax = plt.subplots(figsize=(15, 7))
-plt.title("Predictions by regression models")
-ax.plot(
-    y_test[last_hours],
-    "x-",
-    alpha=0.2,
-    label="Actual demand",
-    color="black",
-)
-ax.plot(
-    median_predictions[last_hours],
-    "^-",
-    label="GBRT median",
-)
-ax.plot(
-    mean_predictions[last_hours],
-    "x-",
-    label="GBRT mean (Poisson)",
-)
-ax.fill_between(
-    np.arange(96),
-    percentile_5_predictions[last_hours],
-    percentile_95_predictions[last_hours],
-    alpha=0.3,
-    label="GBRT 90% interval",
-)
-_ = ax.legend()
-
-# %%
-# Here it's interesting to notice that the blue area between the 5% and 95%
-# percentile estimators has a width that varies with the time of the day:
-#
-# - At night, the blue band is much narrower: the pair of models is quite
-#   certain that there will be a small number of bike rentals. And furthermore
-#   these seem correct in the sense that the actual demand stays in that blue
-#   band.
-# - During the day, the blue band is much wider: the uncertainty grows, probably
-#   because of the variability of the weather that can have a very large impact,
-#   especially on week-ends.
-# - We can also see that during week-days, the commute pattern is still visible in
-#   the 5% and 95% estimations.
-# - Finally, it is expected that 10% of the time, the actual demand does not lie
-#   between the 5% and 95% percentile estimates. On this test span, the actual
-#   demand seems to be higher, especially during the rush hours. It might reveal that
-#   our 95% percentile estimator underestimates the demand peaks. This could be be
-#   quantitatively confirmed by computing empirical coverage numbers as done in
-#   the :ref:`calibration of confidence intervals <calibration-section>`.
-#
-# Looking at the performance of non-linear regression models vs
-# the best models:
-from sklearn.metrics import PredictionErrorDisplay
-
-fig, axes = plt.subplots(ncols=3, figsize=(15, 6), sharey=True)
-fig.suptitle("Non-linear regression models")
-predictions = [
-    median_predictions,
-    percentile_5_predictions,
-    percentile_95_predictions,
-]
-labels = [
-    "Median",
-    "5th percentile",
-    "95th percentile",
-]
-for ax, pred, label in zip(axes, predictions, labels):
-    PredictionErrorDisplay.from_predictions(
-        y_true=y_test,
-        y_pred=pred,
-        kind="residual_vs_predicted",
-        scatter_kwargs={"alpha": 0.3},
-        ax=ax,
-    )
-    ax.set(xlabel="Predicted demand", ylabel="True demand")
-    ax.legend(["Best model", label])
-
-plt.show()
-
-# %%
-# Conclusion
-# ----------
-# Through this example we explored time series forecasting using lagged
-# features. We compared a naive regression (using the standardized
-# :class:`~sklearn.model_selection.train_test_split`) with a proper time
-# series evaluation strategy using
-# :class:`~sklearn.model_selection.TimeSeriesSplit`. We observed that the
-# model trained using :class:`~sklearn.model_selection.train_test_split`,
-# having a default value of `shuffle` set to `True` produced an overly
-# optimistic Mean Average Percentage Error (MAPE). The results
-# produced from the time-based split better represent the performance
-# of our time-series regression model. We also analyzed the predictive uncertainty
-# of our model via Quantile Regression. Predictions based on the 5th and
-# 95th percentile using `loss="quantile"` provide us with a quantitative estimate
-# of the uncertainty of the forecasts made by our time series regression model.
-# Uncertainty estimation can also be performed
-# using `MAPIE <https://mapie.readthedocs.io/en/latest/index.html>`_,
-# that provides an implementation based on recent work on conformal prediction
-# methods and estimates both aleatoric and epistemic uncertainty at the same time.
-# Furthermore, functionalities provided
-# by `sktime <https://www.sktime.net/en/latest/users.html>`_
-# can be used to extend scikit-learn estimators by making use of recursive time
-# series forecasting, that enables dynamic predictions of future values.
+"""
+===========================================
+Lagged features for time series forecasting
+===========================================
+
+This example demonstrates how Polars-engineered lagged features can be used
+for time series forecasting with
+:class:`~sklearn.ensemble.HistGradientBoostingRegressor` on the Bike Sharing
+Demand dataset.
+
+See the example on
+:ref:`sphx_glr_auto_examples_applications_plot_cyclical_feature_engineering.py`
+for some data exploration on this dataset and a demo on periodic feature
+engineering.
+
+"""
+
+# %%
+# Analyzing the Bike Sharing Demand dataset
+# -----------------------------------------
+#
+# We start by loading the data from the OpenML repository as a raw parquet file
+# to illustrate how to work with an arbitrary parquet file instead of hiding this
+# step in a convenience tool such as `sklearn.datasets.fetch_openml`.
+#
+# The URL of the parquet file can be found in the JSON description of the
+# Bike Sharing Demand dataset with id 44063 on openml.org
+# (https://openml.org/search?type=data&status=active&id=44063).
+#
+# The `sha256` hash of the file is also provided to ensure the integrity of the
+# downloaded file.
+import numpy as np
+import polars as pl
+
+from sklearn.datasets import fetch_file
+
+pl.Config.set_fmt_str_lengths(20)
+
+bike_sharing_data_file = fetch_file(
+    "https://openml1.win.tue.nl/datasets/0004/44063/dataset_44063.pq",
+    sha256="d120af76829af0d256338dc6dd4be5df4fd1f35bf3a283cab66a51c1c6abd06a",
+)
+bike_sharing_data_file
+
+# %%
+# We load the parquet file with Polars for feature engineering. Polars
+# automatically caches common subexpressions which are reused in multiple
+# expressions (like `pl.col("count").shift(1)` below). See
+# https://docs.pola.rs/user-guide/lazy/optimizations/ for more information.
+
+df = pl.read_parquet(bike_sharing_data_file)
+
+# %%
+# Next, we take a look at the statistical summary of the dataset
+# so that we can better understand the data that we are working with.
+import polars.selectors as cs
+
+summary = df.select(cs.numeric()).describe()
+summary
+
+# %%
+# Let us look at the count of the seasons `"fall"`, `"spring"`, `"summer"`
+# and `"winter"` present in the dataset to confirm they are balanced.
+
+import matplotlib.pyplot as plt
+
+df["season"].value_counts()
+
+
+# %%
+# Generating Polars-engineered lagged features
+# --------------------------------------------
+# Let's consider the problem of predicting the demand at the
+# next hour given past demands. Since the demand is a continuous
+# variable, one could intuitively use any regression model. However, we do
+# not have the usual `(X_train, y_train)` dataset. Instead, we just have
+# the `y_train` demand data sequentially organized by time.
+lagged_df = df.select(
+    "count",
+    *[pl.col("count").shift(i).alias(f"lagged_count_{i}h") for i in [1, 2, 3]],
+    lagged_count_1d=pl.col("count").shift(24),
+    lagged_count_1d_1h=pl.col("count").shift(24 + 1),
+    lagged_count_7d=pl.col("count").shift(7 * 24),
+    lagged_count_7d_1h=pl.col("count").shift(7 * 24 + 1),
+    lagged_mean_24h=pl.col("count").shift(1).rolling_mean(24),
+    lagged_max_24h=pl.col("count").shift(1).rolling_max(24),
+    lagged_min_24h=pl.col("count").shift(1).rolling_min(24),
+    lagged_mean_7d=pl.col("count").shift(1).rolling_mean(7 * 24),
+    lagged_max_7d=pl.col("count").shift(1).rolling_max(7 * 24),
+    lagged_min_7d=pl.col("count").shift(1).rolling_min(7 * 24),
+)
+lagged_df.tail(10)
+
+# %%
+# Watch out however, the first lines have undefined values because their own
+# past is unknown. This depends on how much lag we used:
+lagged_df.head(10)
+
+# %%
+# We can now separate the lagged features in a matrix `X` and the target variable
+# (the counts to predict) in an array of the same first dimension `y`.
+lagged_df = lagged_df.drop_nulls()
+X = lagged_df.drop("count")
+y = lagged_df["count"]
+print("X shape: {}\ny shape: {}".format(X.shape, y.shape))
+
+# %%
+# Naive evaluation of the next hour bike demand regression
+# --------------------------------------------------------
+# Let's randomly split our tabularized dataset to train a gradient
+# boosting regression tree (GBRT) model and evaluate it using Mean
+# Absolute Percentage Error (MAPE). If our model is aimed at forecasting
+# (i.e., predicting future data from past data), we should not use training
+# data that are ulterior to the testing data. In time series machine learning
+# the "i.i.d" (independent and identically distributed) assumption does not
+# hold true as the data points are not independent and have a temporal
+# relationship.
+from sklearn.ensemble import HistGradientBoostingRegressor
+from sklearn.model_selection import train_test_split
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42
+)
+
+model = HistGradientBoostingRegressor().fit(X_train, y_train)
+
+# %%
+# Taking a look at the performance of the model.
+from sklearn.metrics import mean_absolute_percentage_error
+
+y_pred = model.predict(X_test)
+mean_absolute_percentage_error(y_test, y_pred)
+
+# %%
+# Proper next hour forecasting evaluation
+# ---------------------------------------
+# Let's use a proper evaluation splitting strategies that takes into account
+# the temporal structure of the dataset to evaluate our model's ability to
+# predict data points in the future (to avoid cheating by reading values from
+# the lagged features in the training set).
+from sklearn.model_selection import TimeSeriesSplit
+
+ts_cv = TimeSeriesSplit(
+    n_splits=3,  # to keep the notebook fast enough on common laptops
+    gap=48,  # 2 days data gap between train and test
+    max_train_size=10000,  # keep train sets of comparable sizes
+    test_size=3000,  # for 2 or 3 digits of precision in scores
+)
+all_splits = list(ts_cv.split(X, y))
+
+# %%
+# Training the model and evaluating its performance based on MAPE.
+train_idx, test_idx = all_splits[0]
+X_train, X_test = X[train_idx, :], X[test_idx, :]
+y_train, y_test = y[train_idx], y[test_idx]
+
+model = HistGradientBoostingRegressor().fit(X_train, y_train)
+y_pred = model.predict(X_test)
+mean_absolute_percentage_error(y_test, y_pred)
+
+# %%
+# The generalization error measured via a shuffled trained test split
+# is too optimistic. The generalization via a time-based split is likely to
+# be more representative of the true performance of the regression model.
+# Let's assess this variability of our error evaluation with proper
+# cross-validation:
+from sklearn.model_selection import cross_val_score
+
+cv_mape_scores = -cross_val_score(
+    model, X, y, cv=ts_cv, scoring="neg_mean_absolute_percentage_error"
+)
+cv_mape_scores
+
+# %%
+# The variability across splits is quite large! In a real life setting
+# it would be advised to use more splits to better assess the variability.
+# Let's report the mean CV scores and their standard deviation from now on.
+print(f"CV MAPE: {cv_mape_scores.mean():.3f} ± {cv_mape_scores.std():.3f}")
+
+# %%
+# We can compute several combinations of evaluation metrics and loss functions,
+# which are reported a bit below.
+from collections import defaultdict
+
+from sklearn.metrics import (
+    make_scorer,
+    mean_absolute_error,
+    mean_pinball_loss,
+    root_mean_squared_error,
+)
+from sklearn.model_selection import cross_validate
+
+
+def consolidate_scores(cv_results, scores, metric):
+    if metric == "MAPE":
+        scores[metric].append(f"{value.mean():.2f} ± {value.std():.2f}")
+    else:
+        scores[metric].append(f"{value.mean():.1f} ± {value.std():.1f}")
+
+    return scores
+
+
+scoring = {
+    "MAPE": make_scorer(mean_absolute_percentage_error),
+    "RMSE": make_scorer(root_mean_squared_error),
+    "MAE": make_scorer(mean_absolute_error),
+    "pinball_loss_05": make_scorer(mean_pinball_loss, alpha=0.05),
+    "pinball_loss_50": make_scorer(mean_pinball_loss, alpha=0.50),
+    "pinball_loss_95": make_scorer(mean_pinball_loss, alpha=0.95),
+}
+loss_functions = ["squared_error", "poisson", "absolute_error"]
+scores = defaultdict(list)
+for loss_func in loss_functions:
+    model = HistGradientBoostingRegressor(loss=loss_func)
+    cv_results = cross_validate(
+        model,
+        X,
+        y,
+        cv=ts_cv,
+        scoring=scoring,
+        n_jobs=2,
+    )
+    time = cv_results["fit_time"]
+    scores["loss"].append(loss_func)
+    scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
+
+    for key, value in cv_results.items():
+        if key.startswith("test_"):
+            metric = key.split("test_")[1]
+            scores = consolidate_scores(cv_results, scores, metric)
+
+
+# %%
+# Modeling predictive uncertainty via quantile regression
+# -------------------------------------------------------
+# Instead of modeling the expected value of the distribution of
+# :math:`Y|X` like the least squares and Poisson losses do, one could try to
+# estimate quantiles of the conditional distribution.
+#
+# :math:`Y|X=x_i` is expected to be a random variable for a given data point
+# :math:`x_i` because we expect that the number of rentals cannot be 100%
+# accurately predicted from the features. It can be influenced by other
+# variables not properly captured by the existing lagged features. For
+# instance whether or not it will rain in the next hour cannot be fully
+# anticipated from the past hours bike rental data. This is what we
+# call aleatoric uncertainty.
+#
+# Quantile regression makes it possible to give a finer description of that
+# distribution without making strong assumptions on its shape.
+quantile_list = [0.05, 0.5, 0.95]
+
+for quantile in quantile_list:
+    model = HistGradientBoostingRegressor(loss="quantile", quantile=quantile)
+    cv_results = cross_validate(
+        model,
+        X,
+        y,
+        cv=ts_cv,
+        scoring=scoring,
+        n_jobs=2,
+    )
+    time = cv_results["fit_time"]
+    scores["fit_time"].append(f"{time.mean():.2f} ± {time.std():.2f} s")
+
+    scores["loss"].append(f"quantile {int(quantile*100)}")
+    for key, value in cv_results.items():
+        if key.startswith("test_"):
+            metric = key.split("test_")[1]
+            scores = consolidate_scores(cv_results, scores, metric)
+
+scores_df = pl.DataFrame(scores)
+scores_df
+
+
+# %%
+# Let us take a look at the losses that minimise each metric.
+def min_arg(col):
+    col_split = pl.col(col).str.split(" ")
+    return pl.arg_sort_by(
+        col_split.list.get(0).cast(pl.Float64),
+        col_split.list.get(2).cast(pl.Float64),
+    ).first()
+
+
+scores_df.select(
+    pl.col("loss").get(min_arg(col_name)).alias(col_name)
+    for col_name in scores_df.columns
+    if col_name != "loss"
+)
+
+# %%
+# Even if the score distributions overlap due to the variance in the dataset,
+# it is true that the average RMSE is lower when `loss="squared_error"`, whereas
+# the average MAPE is lower when `loss="absolute_error"` as expected. That is
+# also the case for the Mean Pinball Loss with the quantiles 5 and 95. The score
+# corresponding to the 50 quantile loss is overlapping with the score obtained
+# by minimizing other loss functions, which is also the case for the MAE.
+#
+# A qualitative look at the predictions
+# -------------------------------------
+# We can now visualize the performance of the model with regards
+# to the 5th percentile, median and the 95th percentile:
+all_splits = list(ts_cv.split(X, y))
+train_idx, test_idx = all_splits[0]
+
+X_train, X_test = X[train_idx, :], X[test_idx, :]
+y_train, y_test = y[train_idx], y[test_idx]
+
+max_iter = 50
+gbrt_mean_poisson = HistGradientBoostingRegressor(loss="poisson", max_iter=max_iter)
+gbrt_mean_poisson.fit(X_train, y_train)
+mean_predictions = gbrt_mean_poisson.predict(X_test)
+
+gbrt_median = HistGradientBoostingRegressor(
+    loss="quantile", quantile=0.5, max_iter=max_iter
+)
+gbrt_median.fit(X_train, y_train)
+median_predictions = gbrt_median.predict(X_test)
+
+gbrt_percentile_5 = HistGradientBoostingRegressor(
+    loss="quantile", quantile=0.05, max_iter=max_iter
+)
+gbrt_percentile_5.fit(X_train, y_train)
+percentile_5_predictions = gbrt_percentile_5.predict(X_test)
+
+gbrt_percentile_95 = HistGradientBoostingRegressor(
+    loss="quantile", quantile=0.95, max_iter=max_iter
+)
+gbrt_percentile_95.fit(X_train, y_train)
+percentile_95_predictions = gbrt_percentile_95.predict(X_test)
+
+# %%
+# We can now take a look at the predictions made by the regression models:
+last_hours = slice(-96, None)
+fig, ax = plt.subplots(figsize=(15, 7))
+plt.title("Predictions by regression models")
+ax.plot(
+    y_test[last_hours],
+    "x-",
+    alpha=0.2,
+    label="Actual demand",
+    color="black",
+)
+ax.plot(
+    median_predictions[last_hours],
+    "^-",
+    label="GBRT median",
+)
+ax.plot(
+    mean_predictions[last_hours],
+    "x-",
+    label="GBRT mean (Poisson)",
+)
+ax.fill_between(
+    np.arange(96),
+    percentile_5_predictions[last_hours],
+    percentile_95_predictions[last_hours],
+    alpha=0.3,
+    label="GBRT 90% interval",
+)
+_ = ax.legend()
+
+# %%
+# Here it's interesting to notice that the blue area between the 5% and 95%
+# percentile estimators has a width that varies with the time of the day:
+#
+# - At night, the blue band is much narrower: the pair of models is quite
+#   certain that there will be a small number of bike rentals. And furthermore
+#   these seem correct in the sense that the actual demand stays in that blue
+#   band.
+# - During the day, the blue band is much wider: the uncertainty grows, probably
+#   because of the variability of the weather that can have a very large impact,
+#   especially on week-ends.
+# - We can also see that during week-days, the commute pattern is still visible in
+#   the 5% and 95% estimations.
+# - Finally, it is expected that 10% of the time, the actual demand does not lie
+#   between the 5% and 95% percentile estimates. On this test span, the actual
+#   demand seems to be higher, especially during the rush hours. It might reveal that
+#   our 95% percentile estimator underestimates the demand peaks. This could be be
+#   quantitatively confirmed by computing empirical coverage numbers as done in
+#   the :ref:`calibration of confidence intervals <calibration-section>`.
+#
+# Looking at the performance of non-linear regression models vs
+# the best models:
+from sklearn.metrics import PredictionErrorDisplay
+
+fig, axes = plt.subplots(ncols=3, figsize=(15, 6), sharey=True)
+fig.suptitle("Non-linear regression models")
+predictions = [
+    median_predictions,
+    percentile_5_predictions,
+    percentile_95_predictions,
+]
+labels = [
+    "Median",
+    "5th percentile",
+    "95th percentile",
+]
+for ax, pred, label in zip(axes, predictions, labels):
+    PredictionErrorDisplay.from_predictions(
+        y_true=y_test,
+        y_pred=pred,
+        kind="residual_vs_predicted",
+        scatter_kwargs={"alpha": 0.3},
+        ax=ax,
+    )
+    ax.set(xlabel="Predicted demand", ylabel="True demand")
+    ax.legend(["Best model", label])
+
+plt.show()
+
+# %%
+# Conclusion
+# ----------
+# Through this example we explored time series forecasting using lagged
+# features. We compared a naive regression (using the standardized
+# :class:`~sklearn.model_selection.train_test_split`) with a proper time
+# series evaluation strategy using
+# :class:`~sklearn.model_selection.TimeSeriesSplit`. We observed that the
+# model trained using :class:`~sklearn.model_selection.train_test_split`,
+# having a default value of `shuffle` set to `True` produced an overly
+# optimistic Mean Average Percentage Error (MAPE). The results
+# produced from the time-based split better represent the performance
+# of our time-series regression model. We also analyzed the predictive uncertainty
+# of our model via Quantile Regression. Predictions based on the 5th and
+# 95th percentile using `loss="quantile"` provide us with a quantitative estimate
+# of the uncertainty of the forecasts made by our time series regression model.
+# Uncertainty estimation can also be performed
+# using `MAPIE <https://mapie.readthedocs.io/en/latest/index.html>`_,
+# that provides an implementation based on recent work on conformal prediction
+# methods and estimates both aleatoric and epistemic uncertainty at the same time.
+# Furthermore, functionalities provided
+# by `sktime <https://www.sktime.net/en/latest/users.html>`_
+# can be used to extend scikit-learn estimators by making use of recursive time
+# series forecasting, that enables dynamic predictions of future values.
diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py
index 58cddb099faff..f41b8fa7e73fb 100644
--- a/sklearn/datasets/__init__.py
+++ b/sklearn/datasets/__init__.py
@@ -4,6 +4,7 @@
 
 from ._base import (
     clear_data_home,
+    fetch_file,
     get_data_home,
     load_breast_cancer,
     load_diabetes,
@@ -57,6 +58,7 @@
     "dump_svmlight_file",
     "fetch_20newsgroups",
     "fetch_20newsgroups_vectorized",
+    "fetch_file",
     "fetch_lfw_pairs",
     "fetch_lfw_people",
     "fetch_olivetti_faces",
diff --git a/sklearn/datasets/_base.py b/sklearn/datasets/_base.py
index 7dd2f181dee12..62055d296402b 100644
--- a/sklearn/datasets/_base.py
+++ b/sklearn/datasets/_base.py
@@ -8,8 +8,10 @@
 import gzip
 import hashlib
 import os
+import re
 import shutil
 import time
+import unicodedata
 import warnings
 from collections import namedtuple
 from importlib import resources
@@ -17,7 +19,9 @@
 from os import environ, listdir, makedirs
 from os.path import expanduser, isdir, join, splitext
 from pathlib import Path
+from tempfile import NamedTemporaryFile
 from urllib.error import URLError
+from urllib.parse import urlparse
 from urllib.request import urlretrieve
 
 import numpy as np
@@ -1427,20 +1431,26 @@ def _sha256(path):
 
 
 def _fetch_remote(remote, dirname=None, n_retries=3, delay=1):
-    """Helper function to download a remote dataset into path
+    """Helper function to download a remote dataset.
 
     Fetch a dataset pointed by remote's url, save into path using remote's
-    filename and ensure its integrity based on the SHA256 Checksum of the
+    filename and ensure its integrity based on the SHA256 checksum of the
     downloaded file.
 
+    .. versionchanged:: 1.6
+
+        If the file already exists locally and the SHA256 checksums match, the
+        path to the local file is returned without re-downloading.
+
     Parameters
     ----------
     remote : RemoteFileMetadata
         Named tuple containing remote dataset meta information: url, filename
-        and checksum
+        and checksum.
 
-    dirname : str
-        Directory to save the file to.
+    dirname : str or Path, default=None
+        Directory to save the file to. If None, the current working directory
+        is used.
 
     n_retries : int, default=3
         Number of retries when HTTP errors are encountered.
@@ -1454,28 +1464,173 @@ def _fetch_remote(remote, dirname=None, n_retries=3, delay=1):
 
     Returns
     -------
-    file_path: str
+    file_path: Path
         Full path of the created file.
     """
+    if dirname is None:
+        folder_path = Path(".")
+    else:
+        folder_path = Path(dirname)
+
+    file_path = folder_path / remote.filename
+
+    if file_path.exists():
+        if remote.checksum is None:
+            return file_path
+
+        checksum = _sha256(file_path)
+        if checksum == remote.checksum:
+            return file_path
+        else:
+            warnings.warn(
+                f"SHA256 checksum of existing local file {file_path.name} "
+                f"({checksum}) differs from expected ({remote.checksum}): "
+                f"re-downloading from {remote.url} ."
+            )
+
+    # We create a temporary file dedicated to this particular download to avoid
+    # conflicts with parallel downloads. If the download is successful, the
+    # temporary file is atomically renamed to the final file path (with
+    # `shutil.move`). We therefore pass `delete=False` to `NamedTemporaryFile`.
+    # Otherwise, garbage collecting temp_file would raise an error when
+    # attempting to delete a file that was already renamed. If the download
+    # fails or the result does not match the expected SHA256 digest, the
+    # temporary file is removed manually in the except block.
+    temp_file = NamedTemporaryFile(
+        prefix=remote.filename + ".part_", dir=folder_path, delete=False
+    )
+    # Note that Python 3.12's `delete_on_close=True` is ignored as we set
+    # `delete=False` explicitly. So after this line the empty temporary file still
+    # exists on disk to make sure that it's uniquely reserved for this specific call of
+    # `_fetch_remote` and therefore it protects against any corruption by parallel
+    # calls.
+    temp_file.close()
+    try:
+        temp_file_path = Path(temp_file.name)
+        while True:
+            try:
+                urlretrieve(remote.url, temp_file_path)
+                break
+            except (URLError, TimeoutError):
+                if n_retries == 0:
+                    # If no more retries are left, re-raise the caught exception.
+                    raise
+                warnings.warn(f"Retry downloading from url: {remote.url}")
+                n_retries -= 1
+                time.sleep(delay)
+
+        checksum = _sha256(temp_file_path)
+        if remote.checksum is not None and remote.checksum != checksum:
+            raise OSError(
+                f"The SHA256 checksum of {remote.filename} ({checksum}) "
+                f"differs from expected ({remote.checksum})."
+            )
+    except (Exception, KeyboardInterrupt):
+        os.unlink(temp_file.name)
+        raise
+
+    # The following renaming is atomic whenever temp_file_path and
+    # file_path are on the same filesystem. This should be the case most of
+    # the time, but we still use shutil.move instead of os.rename in case
+    # they are not.
+    shutil.move(temp_file_path, file_path)
 
-    file_path = remote.filename if dirname is None else join(dirname, remote.filename)
-    while True:
-        try:
-            urlretrieve(remote.url, file_path)
-            break
-        except (URLError, TimeoutError):
-            if n_retries == 0:
-                # If no more retries are left, re-raise the caught exception.
-                raise
-            warnings.warn(f"Retry downloading from url: {remote.url}")
-            n_retries -= 1
-            time.sleep(delay)
-
-    checksum = _sha256(file_path)
-    if remote.checksum != checksum:
-        raise OSError(
-            "{} has an SHA256 checksum ({}) "
-            "differing from expected ({}), "
-            "file may be corrupted.".format(file_path, checksum, remote.checksum)
-        )
     return file_path
+
+
+def _filter_filename(value, filter_dots=True):
+    """Derive a name that is safe to use as filename from the given string.
+
+    Adapted from the `slugify` function of django:
+    https://github.com/django/django/blob/master/django/utils/text.py
+
+    Convert spaces or repeated dashes to single dashes. Replace characters that
+    aren't alphanumerics, underscores, hyphens or dots by underscores. Convert
+    to lowercase. Also strip leading and trailing whitespace, dashes, and
+    underscores.
+    """
+    value = unicodedata.normalize("NFKD", value).lower()
+    if filter_dots:
+        value = re.sub(r"[^\w\s-]+", "_", value)
+    else:
+        value = re.sub(r"[^.\w\s-]+", "_", value)
+    value = re.sub(r"[\s-]+", "-", value)
+    return value.strip("-_.")
+
+
+def _derive_folder_and_filename_from_url(url):
+    parsed_url = urlparse(url)
+    if not parsed_url.hostname:
+        raise ValueError(f"Invalid URL: {url}")
+    folder_components = [_filter_filename(parsed_url.hostname, filter_dots=False)]
+    path = parsed_url.path
+
+    if "/" in path:
+        base_folder, raw_filename = path.rsplit("/", 1)
+
+        base_folder = _filter_filename(base_folder)
+        if base_folder:
+            folder_components.append(base_folder)
+    else:
+        raw_filename = path
+
+    filename = _filter_filename(raw_filename, filter_dots=False)
+    if not filename:
+        filename = "downloaded_file"
+
+    return "/".join(folder_components), filename
+
+
+def fetch_file(
+    url, folder=None, local_filename=None, sha256=None, n_retries=3, delay=1
+):
+    """Fetch a file from the web if not already present in the local folder.
+
+    If the file already exists locally (and the SHA256 checksums match when
+    provided), the path to the local file is returned without re-downloading.
+
+    .. versionadded:: 1.6
+
+    Parameters
+    ----------
+    url : str
+        URL of the file to download.
+
+    folder : str or Path, default=None
+        Directory to save the file to. If None, the file is downloaded in a
+        folder with a name derived from the URL host name and path under
+        scikit-learn data home folder.
+
+    local_filename : str, default=None
+        Name of the file to save. If None, the filename is inferred from the
+        URL.
+
+    sha256 : str, default=None
+        SHA256 checksum of the file. If None, no checksum is verified.
+
+    n_retries : int, default=3
+        Number of retries when HTTP errors are encountered.
+
+    delay : int, default=1
+        Number of seconds between retries.
+
+    Returns
+    -------
+    file_path : Path
+        Full path of the downloaded file.
+    """
+    folder_from_url, filename_from_url = _derive_folder_and_filename_from_url(url)
+
+    if local_filename is None:
+        local_filename = filename_from_url
+
+    if folder is None:
+        folder = Path(get_data_home()) / folder_from_url
+        makedirs(folder, exist_ok=True)
+
+    remote_metadata = RemoteFileMetadata(
+        filename=local_filename, url=url, checksum=sha256
+    )
+    return _fetch_remote(
+        remote_metadata, dirname=folder, n_retries=n_retries, delay=delay
+    )
diff --git a/sklearn/datasets/tests/test_base.py b/sklearn/datasets/tests/test_base.py
index b79f8c47c55c5..8b5231f68abdd 100644
--- a/sklearn/datasets/tests/test_base.py
+++ b/sklearn/datasets/tests/test_base.py
@@ -1,5 +1,7 @@
+import hashlib
 import io
 import os
+import re
 import shutil
 import tempfile
 import warnings
@@ -9,12 +11,14 @@
 from pickle import dumps, loads
 from unittest.mock import Mock
 from urllib.error import HTTPError
+from urllib.parse import urlparse
 
 import numpy as np
 import pytest
 
 from sklearn.datasets import (
     clear_data_home,
+    fetch_file,
     get_data_home,
     load_breast_cancer,
     load_diabetes,
@@ -28,6 +32,7 @@
 )
 from sklearn.datasets._base import (
     RemoteFileMetadata,
+    _derive_folder_and_filename_from_url,
     _fetch_remote,
     load_csv_data,
     load_gzip_compressed_csv_data,
@@ -391,3 +396,263 @@ def test_fetch_remote_raise_warnings_with_invalid_url(monkeypatch):
         for r in record:
             assert str(r.message) == f"Retry downloading from url: {url}"
         assert len(record) == 3
+
+
+def test_derive_folder_and_filename_from_url():
+    folder, filename = _derive_folder_and_filename_from_url(
+        "https://example.com/file.tar.gz"
+    )
+    assert folder == "example.com"
+    assert filename == "file.tar.gz"
+
+    folder, filename = _derive_folder_and_filename_from_url(
+        "https://example.com/نمونه نماینده.data"
+    )
+    assert folder == "example.com"
+    assert filename == "نمونه-نماینده.data"
+
+    folder, filename = _derive_folder_and_filename_from_url(
+        "https://example.com/path/to-/.file.tar.gz"
+    )
+    assert folder == "example.com/path_to"
+    assert filename == "file.tar.gz"
+
+    folder, filename = _derive_folder_and_filename_from_url("https://example.com/")
+    assert folder == "example.com"
+    assert filename == "downloaded_file"
+
+    folder, filename = _derive_folder_and_filename_from_url("https://example.com")
+    assert folder == "example.com"
+    assert filename == "downloaded_file"
+
+    folder, filename = _derive_folder_and_filename_from_url(
+        "https://example.com/path/@to/data.json?param=value"
+    )
+    assert folder == "example.com/path_to"
+    assert filename == "data.json"
+
+    folder, filename = _derive_folder_and_filename_from_url(
+        "https://example.com/path/@@to._/-_.data.json.#anchor"
+    )
+    assert folder == "example.com/path_to"
+    assert filename == "data.json"
+
+    folder, filename = _derive_folder_and_filename_from_url(
+        "https://example.com//some_file.txt"
+    )
+    assert folder == "example.com"
+    assert filename == "some_file.txt"
+
+    folder, filename = _derive_folder_and_filename_from_url(
+        "http://example/../some_file.txt"
+    )
+    assert folder == "example"
+    assert filename == "some_file.txt"
+
+    folder, filename = _derive_folder_and_filename_from_url(
+        "https://example.com/!.'.,/some_file.txt"
+    )
+    assert folder == "example.com"
+    assert filename == "some_file.txt"
+
+    folder, filename = _derive_folder_and_filename_from_url(
+        "https://example.com/a/!.'.,/b/some_file.txt"
+    )
+    assert folder == "example.com/a_b"
+    assert filename == "some_file.txt"
+
+    folder, filename = _derive_folder_and_filename_from_url("https://example.com/!.'.,")
+    assert folder == "example.com"
+    assert filename == "downloaded_file"
+
+    with pytest.raises(ValueError, match="Invalid URL"):
+        _derive_folder_and_filename_from_url("https:/../")
+
+
+def _mock_urlretrieve(server_side):
+    def _urlretrieve_mock(url, local_path):
+        server_root = Path(server_side)
+        file_path = urlparse(url).path.strip("/")
+        if not (server_root / file_path).exists():
+            raise HTTPError(url, 404, "Not Found", None, None)
+        shutil.copy(server_root / file_path, local_path)
+
+    return Mock(side_effect=_urlretrieve_mock)
+
+
+def test_fetch_file_using_data_home(monkeypatch, tmpdir):
+    tmpdir = Path(tmpdir)
+    server_side = tmpdir / "server_side"
+    server_side.mkdir()
+    data_file = server_side / "data.jsonl"
+    server_data = '{"a": 1, "b": 2}\n'
+    data_file.write_text(server_data, encoding="utf-8")
+
+    server_subfolder = server_side / "subfolder"
+    server_subfolder.mkdir()
+    other_data_file = server_subfolder / "other_file.txt"
+    other_data_file.write_text("Some important text data.", encoding="utf-8")
+
+    data_home = tmpdir / "data_home"
+    data_home.mkdir()
+
+    urlretrieve_mock = _mock_urlretrieve(server_side)
+    monkeypatch.setattr("sklearn.datasets._base.urlretrieve", urlretrieve_mock)
+
+    monkeypatch.setattr(
+        "sklearn.datasets._base.get_data_home", Mock(return_value=data_home)
+    )
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl",
+    )
+    assert fetched_file_path == data_home / "example.com" / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+
+    fetched_file_path = fetch_file(
+        "https://example.com/subfolder/other_file.txt",
+    )
+    assert (
+        fetched_file_path == data_home / "example.com" / "subfolder" / "other_file.txt"
+    )
+    assert fetched_file_path.read_text(encoding="utf-8") == other_data_file.read_text(
+        "utf-8"
+    )
+
+    expected_warning_msg = re.escape(
+        "Retry downloading from url: https://example.com/subfolder/invalid.txt"
+    )
+    with pytest.raises(HTTPError):
+        with pytest.warns(match=expected_warning_msg):
+            fetch_file(
+                "https://example.com/subfolder/invalid.txt",
+                delay=0,
+            )
+
+    local_subfolder = data_home / "example.com" / "subfolder"
+    assert sorted(local_subfolder.iterdir()) == [local_subfolder / "other_file.txt"]
+
+
+def test_fetch_file_without_sha256(monkeypatch, tmpdir):
+    server_side = tmpdir.mkdir("server_side")
+    data_file = Path(server_side / "data.jsonl")
+    server_data = '{"a": 1, "b": 2}\n'
+    data_file.write_text(server_data, encoding="utf-8")
+
+    client_side = tmpdir.mkdir("client_side")
+
+    urlretrieve_mock = _mock_urlretrieve(server_side)
+    monkeypatch.setattr("sklearn.datasets._base.urlretrieve", urlretrieve_mock)
+
+    # The first call should trigger a download:
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl",
+        folder=client_side,
+    )
+    assert fetched_file_path == client_side / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+    assert urlretrieve_mock.call_count == 1
+
+    # Fetching again the same file to the same folder should do nothing:
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl",
+        folder=client_side,
+    )
+    assert fetched_file_path == client_side / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+    assert urlretrieve_mock.call_count == 1
+
+    # Deleting and calling again should re-download
+    fetched_file_path.unlink()
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl",
+        folder=client_side,
+    )
+    assert fetched_file_path == client_side / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+    assert urlretrieve_mock.call_count == 2
+
+
+def test_fetch_file_with_sha256(monkeypatch, tmpdir):
+    server_side = tmpdir.mkdir("server_side")
+    data_file = Path(server_side / "data.jsonl")
+    server_data = '{"a": 1, "b": 2}\n'
+    data_file.write_text(server_data, encoding="utf-8")
+    expected_sha256 = hashlib.sha256(data_file.read_bytes()).hexdigest()
+
+    client_side = tmpdir.mkdir("client_side")
+
+    urlretrieve_mock = _mock_urlretrieve(server_side)
+    monkeypatch.setattr("sklearn.datasets._base.urlretrieve", urlretrieve_mock)
+
+    # The first call should trigger a download.
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl", folder=client_side, sha256=expected_sha256
+    )
+    assert fetched_file_path == client_side / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+    assert urlretrieve_mock.call_count == 1
+
+    # Fetching again the same file to the same folder should do nothing when
+    # the sha256 match:
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl", folder=client_side, sha256=expected_sha256
+    )
+    assert fetched_file_path == client_side / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+    assert urlretrieve_mock.call_count == 1
+
+    # Corrupting the local data should yield a warning and trigger a new download:
+    fetched_file_path.write_text("corrupted contents", encoding="utf-8")
+    expected_msg = (
+        r"SHA256 checksum of existing local file data.jsonl "
+        rf"\(.*\) differs from expected \({expected_sha256}\): "
+        r"re-downloading from https://example.com/data.jsonl \."
+    )
+    with pytest.warns(match=expected_msg):
+        fetched_file_path = fetch_file(
+            "https://example.com/data.jsonl", folder=client_side, sha256=expected_sha256
+        )
+        assert fetched_file_path == client_side / "data.jsonl"
+        assert fetched_file_path.read_text(encoding="utf-8") == server_data
+        assert urlretrieve_mock.call_count == 2
+
+    # Calling again should do nothing:
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl", folder=client_side, sha256=expected_sha256
+    )
+    assert fetched_file_path == client_side / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+    assert urlretrieve_mock.call_count == 2
+
+    # Deleting the local file and calling again should redownload without warning:
+    fetched_file_path.unlink()
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl", folder=client_side, sha256=expected_sha256
+    )
+    assert fetched_file_path == client_side / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+    assert urlretrieve_mock.call_count == 3
+
+    # Calling without a sha256 should also work without redownloading:
+    fetched_file_path = fetch_file(
+        "https://example.com/data.jsonl",
+        folder=client_side,
+    )
+    assert fetched_file_path == client_side / "data.jsonl"
+    assert fetched_file_path.read_text(encoding="utf-8") == server_data
+    assert urlretrieve_mock.call_count == 3
+
+    # Calling with a wrong sha256 should raise an informative exception:
+    non_matching_sha256 = "deadbabecafebeef"
+    expected_warning_msg = "differs from expected"
+    expected_error_msg = re.escape(
+        f"The SHA256 checksum of data.jsonl ({expected_sha256}) differs from "
+        f"expected ({non_matching_sha256})."
+    )
+    with pytest.raises(OSError, match=expected_error_msg):
+        with pytest.warns(match=expected_warning_msg):
+            fetch_file(
+                "https://example.com/data.jsonl",
+                folder=client_side,
+                sha256=non_matching_sha256,
+            )