@@ -34,7 +34,7 @@ def target_generator(X, add_noise=False):
34
34
# %%
35
35
# Let's have a look to the target generator where we will not add any noise to
36
36
# observe the signal that we would like to predict.
37
- X = np .linspace (0 , 5 , num = 30 ).reshape (- 1 , 1 )
37
+ X = np .linspace (0 , 5 , num = 80 ).reshape (- 1 , 1 )
38
38
y = target_generator (X , add_noise = False )
39
39
40
40
# %%
@@ -89,7 +89,7 @@ def target_generator(X, add_noise=False):
89
89
from sklearn .gaussian_process .kernels import RBF , WhiteKernel
90
90
91
91
kernel = 1.0 * RBF (length_scale = 1e1 , length_scale_bounds = (1e-2 , 1e3 )) + WhiteKernel (
92
- noise_level = 1 , noise_level_bounds = (1e-5 , 1e1 )
92
+ noise_level = 1 , noise_level_bounds = (1e-10 , 1e1 )
93
93
)
94
94
gpr = GaussianProcessRegressor (kernel = kernel , alpha = 0.0 )
95
95
gpr .fit (X_train , y_train )
@@ -98,7 +98,7 @@ def target_generator(X, add_noise=False):
98
98
# %%
99
99
plt .plot (X , y , label = "Expected signal" )
100
100
plt .scatter (x = X_train [:, 0 ], y = y_train , color = "black" , alpha = 0.4 , label = "Observations" )
101
- plt .errorbar (X , y_mean , y_std )
101
+ plt .errorbar (X , y_mean , y_std , label = "Posterior mean ± std" )
102
102
plt .legend ()
103
103
plt .xlabel ("X" )
104
104
plt .ylabel ("y" )
@@ -110,15 +110,18 @@ def target_generator(X, add_noise=False):
110
110
fontsize = 8 ,
111
111
)
112
112
# %%
113
- # We see that the optimum kernel found still have a high noise level and
114
- # an even larger length scale. Furthermore, we observe that the
115
- # model does not provide faithful predictions .
113
+ # We see that the optimum kernel found still has a high noise level and an even
114
+ # larger length scale. The length scale reaches the maximum bound that we
115
+ # allowed for this parameter and we got a warning as a result .
116
116
#
117
- # Now, we will initialize the
118
- # :class:`~sklearn.gaussian_process.kernels.RBF` with a
119
- # larger `length_scale` and the
120
- # :class:`~sklearn.gaussian_process.kernels.WhiteKernel`
121
- # with a smaller noise level lower bound.
117
+ # More importantly, we observe that the model does not provide useful
118
+ # predictions: the mean prediction seems to be constant: it does not follow the
119
+ # expected noise-free signal.
120
+ #
121
+ # Now, we will initialize the :class:`~sklearn.gaussian_process.kernels.RBF`
122
+ # with a larger `length_scale` initial value and the
123
+ # :class:`~sklearn.gaussian_process.kernels.WhiteKernel` with a smaller initial
124
+ # noise level lower while keeping the parameter bounds unchanged.
122
125
kernel = 1.0 * RBF (length_scale = 1e-1 , length_scale_bounds = (1e-2 , 1e3 )) + WhiteKernel (
123
126
noise_level = 1e-2 , noise_level_bounds = (1e-10 , 1e1 )
124
127
)
@@ -129,7 +132,7 @@ def target_generator(X, add_noise=False):
129
132
# %%
130
133
plt .plot (X , y , label = "Expected signal" )
131
134
plt .scatter (x = X_train [:, 0 ], y = y_train , color = "black" , alpha = 0.4 , label = "Observations" )
132
- plt .errorbar (X , y_mean , y_std )
135
+ plt .errorbar (X , y_mean , y_std , label = "Posterior mean ± std" )
133
136
plt .legend ()
134
137
plt .xlabel ("X" )
135
138
plt .ylabel ("y" )
@@ -154,21 +157,19 @@ def target_generator(X, add_noise=False):
154
157
# for different hyperparameters to get a sense of the local minima.
155
158
from matplotlib .colors import LogNorm
156
159
157
- length_scale = np .logspace (- 2 , 4 , num = 50 )
158
- noise_level = np .logspace (- 2 , 1 , num = 50 )
160
+ length_scale = np .logspace (- 2 , 4 , num = 80 )
161
+ noise_level = np .logspace (- 2 , 1 , num = 80 )
159
162
length_scale_grid , noise_level_grid = np .meshgrid (length_scale , noise_level )
160
163
161
164
log_marginal_likelihood = [
162
165
gpr .log_marginal_likelihood (theta = np .log ([0.36 , scale , noise ]))
163
166
for scale , noise in zip (length_scale_grid .ravel (), noise_level_grid .ravel ())
164
167
]
165
- log_marginal_likelihood = np .reshape (
166
- log_marginal_likelihood , newshape = noise_level_grid .shape
167
- )
168
+ log_marginal_likelihood = np .reshape (log_marginal_likelihood , noise_level_grid .shape )
168
169
169
170
# %%
170
171
vmin , vmax = (- log_marginal_likelihood ).min (), 50
171
- level = np .around (np .logspace (np .log10 (vmin ), np .log10 (vmax ), num = 50 ), decimals = 1 )
172
+ level = np .around (np .logspace (np .log10 (vmin ), np .log10 (vmax ), num = 20 ), decimals = 1 )
172
173
plt .contour (
173
174
length_scale_grid ,
174
175
noise_level_grid ,
@@ -185,8 +186,43 @@ def target_generator(X, add_noise=False):
185
186
plt .show ()
186
187
187
188
# %%
188
- # We see that there are two local minima that correspond to the combination
189
- # of hyperparameters previously found. Depending on the initial values for the
190
- # hyperparameters, the gradient-based optimization might converge whether or
191
- # not to the best model. It is thus important to repeat the optimization
192
- # several times for different initializations.
189
+ #
190
+ # We see that there are two local minima that correspond to the combination of
191
+ # hyperparameters previously found. Depending on the initial values for the
192
+ # hyperparameters, the gradient-based optimization might or might not
193
+ # converge to the best model. It is thus important to repeat the optimization
194
+ # several times for different initializations. This can be done by setting the
195
+ # `n_restarts_optimizer` parameter of the
196
+ # :class:`~sklearn.gaussian_process.GaussianProcessRegressor` class.
197
+ #
198
+ # Let's try again to fit our model with the bad initial values but this time
199
+ # with 10 random restarts.
200
+
201
+ kernel = 1.0 * RBF (length_scale = 1e1 , length_scale_bounds = (1e-2 , 1e3 )) + WhiteKernel (
202
+ noise_level = 1 , noise_level_bounds = (1e-10 , 1e1 )
203
+ )
204
+ gpr = GaussianProcessRegressor (
205
+ kernel = kernel , alpha = 0.0 , n_restarts_optimizer = 10 , random_state = 0
206
+ )
207
+ gpr .fit (X_train , y_train )
208
+ y_mean , y_std = gpr .predict (X , return_std = True )
209
+
210
+ # %%
211
+ plt .plot (X , y , label = "Expected signal" )
212
+ plt .scatter (x = X_train [:, 0 ], y = y_train , color = "black" , alpha = 0.4 , label = "Observations" )
213
+ plt .errorbar (X , y_mean , y_std , label = "Posterior mean ± std" )
214
+ plt .legend ()
215
+ plt .xlabel ("X" )
216
+ plt .ylabel ("y" )
217
+ _ = plt .title (
218
+ (
219
+ f"Initial: { kernel } \n Optimum: { gpr .kernel_ } \n Log-Marginal-Likelihood: "
220
+ f"{ gpr .log_marginal_likelihood (gpr .kernel_ .theta )} "
221
+ ),
222
+ fontsize = 8 ,
223
+ )
224
+
225
+ # %%
226
+ #
227
+ # As we hoped, random restarts allow the optimization to find the best set
228
+ # of hyperparameters despite the bad initial values.
0 commit comments