@@ -33,7 +33,7 @@ def target_generator(X, add_noise=False):
33
33
# %%
34
34
# Let's have a look to the target generator where we will not add any noise to
35
35
# observe the signal that we would like to predict.
36
- X = np .linspace (0 , 5 , num = 30 ).reshape (- 1 , 1 )
36
+ X = np .linspace (0 , 5 , num = 80 ).reshape (- 1 , 1 )
37
37
y = target_generator (X , add_noise = False )
38
38
39
39
# %%
@@ -88,7 +88,7 @@ def target_generator(X, add_noise=False):
88
88
from sklearn .gaussian_process .kernels import RBF , WhiteKernel
89
89
90
90
kernel = 1.0 * RBF (length_scale = 1e1 , length_scale_bounds = (1e-2 , 1e3 )) + WhiteKernel (
91
- noise_level = 1 , noise_level_bounds = (1e-5 , 1e1 )
91
+ noise_level = 1 , noise_level_bounds = (1e-10 , 1e1 )
92
92
)
93
93
gpr = GaussianProcessRegressor (kernel = kernel , alpha = 0.0 )
94
94
gpr .fit (X_train , y_train )
@@ -97,7 +97,7 @@ def target_generator(X, add_noise=False):
97
97
# %%
98
98
plt .plot (X , y , label = "Expected signal" )
99
99
plt .scatter (x = X_train [:, 0 ], y = y_train , color = "black" , alpha = 0.4 , label = "Observations" )
100
- plt .errorbar (X , y_mean , y_std )
100
+ plt .errorbar (X , y_mean , y_std , label = "Posterior mean ± std" )
101
101
plt .legend ()
102
102
plt .xlabel ("X" )
103
103
plt .ylabel ("y" )
@@ -109,15 +109,18 @@ def target_generator(X, add_noise=False):
109
109
fontsize = 8 ,
110
110
)
111
111
# %%
112
- # We see that the optimum kernel found still have a high noise level and
113
- # an even larger length scale. Furthermore, we observe that the
114
- # model does not provide faithful predictions .
112
+ # We see that the optimum kernel found still has a high noise level and an even
113
+ # larger length scale. The length scale reaches the maximum bound that we
114
+ # allowed for this parameter and we got a warning as a result .
115
115
#
116
- # Now, we will initialize the
117
- # :class:`~sklearn.gaussian_process.kernels.RBF` with a
118
- # larger `length_scale` and the
119
- # :class:`~sklearn.gaussian_process.kernels.WhiteKernel`
120
- # with a smaller noise level lower bound.
116
+ # More importantly, we observe that the model does not provide useful
117
+ # predictions: the mean prediction seems to be constant: it does not follow the
118
+ # expected noise-free signal.
119
+ #
120
+ # Now, we will initialize the :class:`~sklearn.gaussian_process.kernels.RBF`
121
+ # with a larger `length_scale` initial value and the
122
+ # :class:`~sklearn.gaussian_process.kernels.WhiteKernel` with a smaller initial
123
+ # noise level lower while keeping the parameter bounds unchanged.
121
124
kernel = 1.0 * RBF (length_scale = 1e-1 , length_scale_bounds = (1e-2 , 1e3 )) + WhiteKernel (
122
125
noise_level = 1e-2 , noise_level_bounds = (1e-10 , 1e1 )
123
126
)
@@ -128,7 +131,7 @@ def target_generator(X, add_noise=False):
128
131
# %%
129
132
plt .plot (X , y , label = "Expected signal" )
130
133
plt .scatter (x = X_train [:, 0 ], y = y_train , color = "black" , alpha = 0.4 , label = "Observations" )
131
- plt .errorbar (X , y_mean , y_std )
134
+ plt .errorbar (X , y_mean , y_std , label = "Posterior mean ± std" )
132
135
plt .legend ()
133
136
plt .xlabel ("X" )
134
137
plt .ylabel ("y" )
@@ -153,21 +156,19 @@ def target_generator(X, add_noise=False):
153
156
# for different hyperparameters to get a sense of the local minima.
154
157
from matplotlib .colors import LogNorm
155
158
156
- length_scale = np .logspace (- 2 , 4 , num = 50 )
157
- noise_level = np .logspace (- 2 , 1 , num = 50 )
159
+ length_scale = np .logspace (- 2 , 4 , num = 80 )
160
+ noise_level = np .logspace (- 2 , 1 , num = 80 )
158
161
length_scale_grid , noise_level_grid = np .meshgrid (length_scale , noise_level )
159
162
160
163
log_marginal_likelihood = [
161
164
gpr .log_marginal_likelihood (theta = np .log ([0.36 , scale , noise ]))
162
165
for scale , noise in zip (length_scale_grid .ravel (), noise_level_grid .ravel ())
163
166
]
164
- log_marginal_likelihood = np .reshape (
165
- log_marginal_likelihood , newshape = noise_level_grid .shape
166
- )
167
+ log_marginal_likelihood = np .reshape (log_marginal_likelihood , noise_level_grid .shape )
167
168
168
169
# %%
169
170
vmin , vmax = (- log_marginal_likelihood ).min (), 50
170
- level = np .around (np .logspace (np .log10 (vmin ), np .log10 (vmax ), num = 50 ), decimals = 1 )
171
+ level = np .around (np .logspace (np .log10 (vmin ), np .log10 (vmax ), num = 20 ), decimals = 1 )
171
172
plt .contour (
172
173
length_scale_grid ,
173
174
noise_level_grid ,
@@ -184,8 +185,43 @@ def target_generator(X, add_noise=False):
184
185
plt .show ()
185
186
186
187
# %%
187
- # We see that there are two local minima that correspond to the combination
188
- # of hyperparameters previously found. Depending on the initial values for the
189
- # hyperparameters, the gradient-based optimization might converge whether or
190
- # not to the best model. It is thus important to repeat the optimization
191
- # several times for different initializations.
188
+ #
189
+ # We see that there are two local minima that correspond to the combination of
190
+ # hyperparameters previously found. Depending on the initial values for the
191
+ # hyperparameters, the gradient-based optimization might or might not
192
+ # converge to the best model. It is thus important to repeat the optimization
193
+ # several times for different initializations. This can be done by setting the
194
+ # `n_restarts_optimizer` parameter of the
195
+ # :class:`~sklearn.gaussian_process.GaussianProcessRegressor` class.
196
+ #
197
+ # Let's try again to fit our model with the bad initial values but this time
198
+ # with 10 random restarts.
199
+
200
+ kernel = 1.0 * RBF (length_scale = 1e1 , length_scale_bounds = (1e-2 , 1e3 )) + WhiteKernel (
201
+ noise_level = 1 , noise_level_bounds = (1e-10 , 1e1 )
202
+ )
203
+ gpr = GaussianProcessRegressor (
204
+ kernel = kernel , alpha = 0.0 , n_restarts_optimizer = 10 , random_state = 0
205
+ )
206
+ gpr .fit (X_train , y_train )
207
+ y_mean , y_std = gpr .predict (X , return_std = True )
208
+
209
+ # %%
210
+ plt .plot (X , y , label = "Expected signal" )
211
+ plt .scatter (x = X_train [:, 0 ], y = y_train , color = "black" , alpha = 0.4 , label = "Observations" )
212
+ plt .errorbar (X , y_mean , y_std , label = "Posterior mean ± std" )
213
+ plt .legend ()
214
+ plt .xlabel ("X" )
215
+ plt .ylabel ("y" )
216
+ _ = plt .title (
217
+ (
218
+ f"Initial: { kernel } \n Optimum: { gpr .kernel_ } \n Log-Marginal-Likelihood: "
219
+ f"{ gpr .log_marginal_likelihood (gpr .kernel_ .theta )} "
220
+ ),
221
+ fontsize = 8 ,
222
+ )
223
+
224
+ # %%
225
+ #
226
+ # As we hoped, random restarts allow the optimization to find the best set
227
+ # of hyperparameters despite the bad initial values.
0 commit comments