1
2
3
4
5
6
7
8
9
10 """Gaussian Process Regression (GPR)."""
11
12 __docformat__ = 'restructuredtext'
13
14
15 import numpy as N
16
17 from mvpa.base import externals
18
19 from mvpa.misc.state import StateVariable
20 from mvpa.clfs.base import Classifier
21 from mvpa.misc.param import Parameter
22 from mvpa.clfs.kernel import KernelSquaredExponential, KernelLinear
23 from mvpa.measures.base import Sensitivity
24 from mvpa.misc.exceptions import InvalidHyperparameterError
25 from mvpa.datasets import Dataset
26
27 externals.exists("scipy", raiseException=True)
28 from scipy.linalg import cho_solve as SLcho_solve
29 from scipy.linalg import cholesky as SLcholesky
30 import scipy.linalg as SL
31
32 if __debug__:
33 from mvpa.base import debug
34
35
36 Nlog = N.log
37 Ndot = N.dot
38 Ndiag = N.diag
39 NLAcholesky = N.linalg.cholesky
40 NLAsolve = N.linalg.solve
41 NLAError = N.linalg.linalg.LinAlgError
42 SLAError = SL.basic.LinAlgError
43
44
45 _halflog2pi = 0.5 * Nlog(2 * N.pi)
46
47
48 -class GPR(Classifier):
49 """Gaussian Process Regression (GPR).
50
51 """
52
53 predicted_variances = StateVariable(enabled=False,
54 doc="Variance per each predicted value")
55
56 log_marginal_likelihood = StateVariable(enabled=False,
57 doc="Log Marginal Likelihood")
58
59 log_marginal_likelihood_gradient = StateVariable(enabled=False,
60 doc="Log Marginal Likelihood Gradient")
61
62 _clf_internals = [ 'gpr', 'regression', 'retrainable' ]
63
64
65
66
67
68
69
70
71
72
73
74
75
76 sigma_noise = Parameter(0.001, allowedtype='float', min=1e-10,
77 doc="the standard deviation of the gaussian noise.")
78
79
80
81
82
83
84
85 - def __init__(self, kernel=None, **kwargs):
121
122
124 """Reset some internal variables to None.
125
126 To be used in constructor and untrain()
127 """
128 self._train_fv = None
129 self._labels = None
130 self._km_train_train = None
131 self._train_labels = None
132 self._alpha = None
133 self._L = None
134 self._LL = None
135 self.__kernel.reset()
136 pass
137
138
140 """String summary of the object
141 """
142 return super(GPR, self).__repr__(
143 prefixes=['kernel=%s' % self.__kernel])
144
145
147 """
148 Compute log marginal likelihood using self.train_fv and self.labels.
149 """
150 if __debug__:
151 debug("GPR", "Computing log_marginal_likelihood")
152 self.log_marginal_likelihood = \
153 -0.5*Ndot(self._train_labels, self._alpha) - \
154 Nlog(self._L.diagonal()).sum() - \
155 self._km_train_train.shape[0] * _halflog2pi
156 return self.log_marginal_likelihood
157
158
160 """Compute gradient of the log marginal likelihood. This
161 version use a more compact formula provided by Williams and
162 Rasmussen book.
163 """
164
165
166
167
168
169
170
171
172
173
174
175
176 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0]))
177
178 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:])
179 tmp = alphalphaT - Kinv
180
181
182 grad_LML_hypers = self.__kernel.compute_lml_gradient(
183 tmp, self._train_fv)
184 grad_K_sigma_n = 2.0*self.sigma_noise*N.eye(tmp.shape[0])
185
186
187
188 grad_LML_sigma_n = 0.5 * (tmp * (grad_K_sigma_n).T).sum()
189 lml_gradient = N.hstack([grad_LML_sigma_n, grad_LML_hypers])
190 self.log_marginal_likelihood_gradient = lml_gradient
191 return lml_gradient
192
193
195 """Compute gradient of the log marginal likelihood when
196 hyperparameters are in logscale. This version use a more
197 compact formula provided by Williams and Rasmussen book.
198 """
199
200
201 Kinv = SLcho_solve(self._LL, N.eye(self._L.shape[0]))
202 alphalphaT = N.dot(self._alpha[:,None], self._alpha[None,:])
203 tmp = alphalphaT - Kinv
204 grad_LML_log_hypers = \
205 self.__kernel.compute_lml_gradient_logscale(tmp, self._train_fv)
206 grad_K_log_sigma_n = 2.0 * self.sigma_noise ** 2 * N.eye(Kinv.shape[0])
207
208
209
210 grad_LML_log_sigma_n = 0.5 * (tmp * (grad_K_log_sigma_n).T).sum()
211 lml_gradient = N.hstack([grad_LML_log_sigma_n, grad_LML_log_hypers])
212 self.log_marginal_likelihood_gradient = lml_gradient
213 return lml_gradient
214
215
217 """Returns a sensitivity analyzer for GPR.
218
219 :Parameters:
220 flavor : basestring
221 What sensitivity to provide. Valid values are
222 'linear', 'model_select', 'auto'.
223 In case of 'auto' selects 'linear' for linear kernel
224 and 'model_select' for the rest. 'linear' corresponds to
225 GPRLinearWeights and 'model_select' to GRPWeights
226 """
227
228
229
230
231
232 if flavor == 'auto':
233 flavor = ('model_select', 'linear')\
234 [int(isinstance(self.__kernel, KernelLinear))]
235 if __debug__:
236 debug("GPR", "Returning '%s' sensitivity analyzer" % flavor)
237
238
239 if flavor == 'linear':
240 return GPRLinearWeights(self, **kwargs)
241 elif flavor == 'model_select':
242
243 if not ('has_sensitivity' in self._clf_internals):
244 raise ValueError, \
245 "model_select flavor is not available probably " \
246 "due to not available 'openopt' module"
247 return GPRWeights(self, **kwargs)
248 else:
249 raise ValueError, "Flavor %s is not recognized" % flavor
250
251
253 """Train the classifier using `data` (`Dataset`).
254 """
255
256
257 retrainable = self.params.retrainable
258 if retrainable:
259 newkernel = False
260 newL = False
261 _changedData = self._changedData
262
263 self._train_fv = train_fv = data.samples
264 self._train_labels = train_labels = data.labels
265
266 if not retrainable or _changedData['traindata'] \
267 or _changedData.get('kernel_params', False):
268 if __debug__:
269 debug("GPR", "Computing train train kernel matrix")
270 self._km_train_train = km_train_train = self.__kernel.compute(train_fv)
271 newkernel = True
272 if retrainable:
273 self._km_train_test = None
274 else:
275 if __debug__:
276 debug("GPR", "Not recomputing kernel since retrainable and "
277 "nothing has changed")
278 km_train_train = self._km_train_train
279
280 if not retrainable or newkernel or _changedData['params']:
281 if __debug__:
282 debug("GPR", "Computing L. sigma_noise=%g" % self.sigma_noise)
283
284
285 self._C = km_train_train + \
286 self.sigma_noise**2 * N.identity(km_train_train.shape[0], 'd')
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305 try:
306 self._L = SLcholesky(self._C, lower=True)
307 self._LL = (self._L, True)
308 except SLAError:
309 epsilon = 1.0e-20 * N.eye(self._C.shape[0])
310 self._L = SLcholesky(self._C + epsilon, lower=True)
311 self._LL = (self._L, True)
312 pass
313 newL = True
314 else:
315 if __debug__:
316 debug("GPR", "Not computing L since kernel, data and params "
317 "stayed the same")
318 L = self._L
319
320
321
322
323 if __debug__:
324 debug("GPR", "Computing alpha")
325
326
327
328 self._alpha = SLcho_solve(self._LL, train_labels)
329
330
331 if self.states.isEnabled('log_marginal_likelihood'):
332 self.compute_log_marginal_likelihood()
333 pass
334
335 if retrainable:
336
337 self.states.retrained = not newkernel or not newL
338
339 if __debug__:
340 debug("GPR", "Done training")
341
342 pass
343
344
346 """
347 Predict the output for the provided data.
348 """
349 retrainable = self.params.retrainable
350
351 if not retrainable or self._changedData['testdata'] \
352 or self._km_train_test is None:
353 if __debug__:
354 debug('GPR', "Computing train test kernel matrix")
355 km_train_test = self.__kernel.compute(self._train_fv, data)
356 if retrainable:
357 self._km_train_test = km_train_test
358 self.states.repredicted = False
359 else:
360 if __debug__:
361 debug('GPR', "Not recomputing train test kernel matrix")
362 km_train_test = self._km_train_test
363 self.states.repredicted = True
364
365
366 predictions = Ndot(km_train_test.transpose(), self._alpha)
367
368 if self.states.isEnabled('predicted_variances'):
369
370 if not retrainable or self._km_test_test is None \
371 or self._changedData['testdata']:
372 if __debug__:
373 debug('GPR', "Computing test test kernel matrix")
374 km_test_test = self.__kernel.compute(data)
375 if retrainable:
376 self._km_test_test = km_test_test
377 else:
378 if __debug__:
379 debug('GPR', "Not recomputing test test kernel matrix")
380 km_test_test = self._km_test_test
381
382 if __debug__:
383 debug("GPR", "Computing predicted variances")
384 L = self._L
385
386
387 piv = N.arange(L.shape[0])
388 v = SL.lu_solve((L.T, piv), km_train_test, trans=1)
389
390
391
392
393 self.predicted_variances = Ndiag(km_test_test) - (v ** 2).sum(0) \
394 + self.sigma_noise ** 2
395 pass
396
397 if __debug__:
398 debug("GPR", "Done predicting")
399 return predictions
400
401
408
409
415
416
418 """
419 Set hyperparameters' values.
420
421 Note that 'hyperparameter' is a sequence so the order of its
422 values is important. First value must be sigma_noise, then
423 other kernel's hyperparameters values follow in the exact
424 order the kernel expect them to be.
425 """
426 if hyperparameter[0] < GPR.sigma_noise.min:
427 raise InvalidHyperparameterError()
428 self.sigma_noise = hyperparameter[0]
429 if hyperparameter.size > 1:
430 self.__kernel.set_hyperparameters(hyperparameter[1:])
431 pass
432 return
433
434 kernel = property(fget=lambda self:self.__kernel)
435 pass
436
437
439 """`SensitivityAnalyzer` that reports the weights GPR trained
440 on a given `Dataset`.
441
442 In case of KernelLinear compute explicitly the coefficients
443 of the linear regression, together with their variances (if
444 requested).
445
446 Note that the intercept is not computed.
447 """
448
449 variances = StateVariable(enabled=False,
450 doc="Variances of the weights (for KernelLinear)")
451
452 _LEGAL_CLFS = [ GPR ]
453
454
455 - def _call(self, dataset):
479
480
481 if externals.exists('openopt'):
482
483 from mvpa.clfs.model_selector import ModelSelector
484
486 """`SensitivityAnalyzer` that reports the weights GPR trained
487 on a given `Dataset`.
488 """
489
490 _LEGAL_CLFS = [ GPR ]
491
492 - def _call(self, dataset):
493 """Extract weights from GPR
494 """
495
496 clf = self.clf
497
498 clf._train_labels = (clf._train_labels - clf._train_labels.mean()) \
499 / clf._train_labels.std()
500
501
502 dataset = Dataset(samples=clf._train_fv, labels=clf._train_labels)
503 clf.states.enable("log_marginal_likelihood")
504 ms = ModelSelector(clf, dataset)
505
506 sigma_noise_initial = 1.0e-5
507 sigma_f_initial = 1.0
508 length_scale_initial = N.ones(dataset.nfeatures)*1.0e4
509
510 hyp_initial_guess = N.hstack([sigma_noise_initial,
511 sigma_f_initial,
512 length_scale_initial])
513 fixedHypers = N.array([0]*hyp_initial_guess.size, dtype=bool)
514 fixedHypers = None
515 problem = ms.max_log_marginal_likelihood(
516 hyp_initial_guess=hyp_initial_guess,
517 optimization_algorithm="scipy_lbfgsb",
518 ftol=1.0e-3, fixedHypers=fixedHypers,
519 use_gradient=True, logscale=True)
520 if __debug__ and 'GPR_WEIGHTS' in debug.active:
521 problem.iprint = 1
522 lml = ms.solve()
523 weights = 1.0/ms.hyperparameters_best[2:]
524 if __debug__:
525 debug("GPR",
526 "%s, train: shape %s, labels %s, min:max %g:%g, "
527 "sigma_noise %g, sigma_f %g" %
528 (clf, clf._train_fv.shape, N.unique(clf._train_labels),
529 clf._train_fv.min(), clf._train_fv.max(),
530 ms.hyperparameters_best[0], ms.hyperparameters_best[1]))
531
532 return weights
533
534
535
536 if __name__ == "__main__":
537 import pylab
538 pylab.close("all")
539 pylab.ion()
540
541 from mvpa.misc.data_generators import sinModulated
542
543 - def compute_prediction(sigma_noise_best, sigma_f, length_scale_best,
544 regression, dataset, data_test, label_test, F,
545 logml=True):
546 """XXX Function which seems to be valid only for __main__...
547
548 TODO: remove reimporting of pylab etc. See pylint output for more
549 information
550 """
551
552 data_train = dataset.samples
553 label_train = dataset.labels
554 import pylab
555 kse = KernelSquaredExponential(length_scale=length_scale_best,
556 sigma_f=sigma_f)
557 g = GPR(kse, sigma_noise=sigma_noise_best, regression=regression)
558 print g
559 if regression:
560 g.states.enable("predicted_variances")
561 pass
562
563 if logml:
564 g.states.enable("log_marginal_likelihood")
565 pass
566
567 g.train(dataset)
568 prediction = g.predict(data_test)
569
570
571
572 accuracy = None
573 if regression:
574 accuracy = N.sqrt(((prediction-label_test)**2).sum()/prediction.size)
575 print "RMSE:", accuracy
576 else:
577 accuracy = (prediction.astype('l')==label_test.astype('l')).sum() \
578 / float(prediction.size)
579 print "accuracy:", accuracy
580 pass
581
582 if F == 1:
583 pylab.figure()
584 pylab.plot(data_train, label_train, "ro", label="train")
585 pylab.plot(data_test, prediction, "b-", label="prediction")
586 pylab.plot(data_test, label_test, "g+", label="test")
587 if regression:
588 pylab.plot(data_test, prediction-N.sqrt(g.predicted_variances),
589 "b--", label=None)
590 pylab.plot(data_test, prediction+N.sqrt(g.predicted_variances),
591 "b--", label=None)
592 pylab.text(0.5, -0.8, "RMSE="+"%f" %(accuracy))
593 else:
594 pylab.text(0.5, -0.8, "accuracy="+str(accuracy))
595 pass
596 pylab.legend()
597 pass
598
599 print "LML:", g.log_marginal_likelihood
600
601 train_size = 40
602 test_size = 100
603 F = 1
604
605 dataset = sinModulated(train_size, F)
606
607
608 dataset_test = sinModulated(test_size, F, flat=True)
609
610
611 regression = True
612 logml = True
613
614 if logml :
615 print "Looking for better hyperparameters: grid search"
616
617 sigma_noise_steps = N.linspace(0.1, 0.5, num=20)
618 length_scale_steps = N.linspace(0.05, 0.6, num=20)
619 lml = N.zeros((len(sigma_noise_steps), len(length_scale_steps)))
620 lml_best = -N.inf
621 length_scale_best = 0.0
622 sigma_noise_best = 0.0
623 i = 0
624 for x in sigma_noise_steps:
625 j = 0
626 for y in length_scale_steps:
627 kse = KernelSquaredExponential(length_scale=y)
628 g = GPR(kse, sigma_noise=x, regression=regression)
629 g.states.enable("log_marginal_likelihood")
630 g.train(dataset)
631 lml[i, j] = g.log_marginal_likelihood
632
633
634
635
636 if lml[i, j] > lml_best:
637 lml_best = lml[i, j]
638 length_scale_best = y
639 sigma_noise_best = x
640
641 pass
642 j += 1
643 pass
644 i += 1
645 pass
646 pylab.figure()
647 X = N.repeat(sigma_noise_steps[:, N.newaxis], sigma_noise_steps.size,
648 axis=1)
649 Y = N.repeat(length_scale_steps[N.newaxis, :], length_scale_steps.size,
650 axis=0)
651 step = (lml.max()-lml.min())/30
652 pylab.contour(X, Y, lml, N.arange(lml.min(), lml.max()+step, step),
653 colors='k')
654 pylab.plot([sigma_noise_best], [length_scale_best], "k+",
655 markeredgewidth=2, markersize=8)
656 pylab.xlabel("noise standard deviation")
657 pylab.ylabel("characteristic length_scale")
658 pylab.title("log marginal likelihood")
659 pylab.axis("tight")
660 print "lml_best", lml_best
661 print "sigma_noise_best", sigma_noise_best
662 print "length_scale_best", length_scale_best
663 print "number of expected upcrossing on the unitary intervale:", \
664 1.0/(2*N.pi*length_scale_best)
665 pass
666
667
668
669 compute_prediction(sigma_noise_best, 1.0, length_scale_best, regression, dataset,
670 dataset_test.samples, dataset_test.labels, F, logml)
671 pylab.show()
672