| Home | Trees | Indices | Help |
|
|---|
|
|
1 #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*-
2 #ex: set sts=4 ts=4 sw=4 et:
3 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
4 #
5 # Copyright (c) 2008 Emanuele Olivetti <emanuele@relativita.com>
6 # See COPYING file distributed along with the PyMVPA package for the
7 # copyright and license terms.
8 #
9 ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
10 """Model selction."""
11
12 __docformat__ = 'restructuredtext'
13
14
15 import numpy as N
16 from mvpa.base import externals
17 from mvpa.misc.exceptions import InvalidHyperparameterError
18
19 externals.exists("scipy", raiseException=True)
20 import scipy.linalg as SL
21
22 # no sense to import this module if openopt is not available
23 if externals.exists("openopt", raiseException=True):
24 from scikits.openopt import NLP
25
26 if __debug__:
27 from mvpa.base import debug
28
30 # shut up or make verbose OpenOpt
31 # (-1 = no logs, 0 = small log, 1 = verbose)
32 if __debug__:
33 da = debug.active
34 if 'OPENOPT' in da:
35 return 1
36 elif 'MOD_SEL' in da:
37 return 0
38 return -1
39
40
42 """Model selection facility.
43
44 Select a model among multiple models (i.e., a parametric model,
45 parametrized by a set of hyperparamenters).
46 """
47
49 self.parametric_model = parametric_model
50 self.dataset = dataset
51 self.hyperparameters_best = None
52 self.log_marginal_likelihood_best = None
53 self.problem = None
54 pass
55
56 - def max_log_marginal_likelihood(self, hyp_initial_guess, maxiter=1,
57 optimization_algorithm="scipy_cg", ftol=1.0e-3, fixedHypers=None,
58 use_gradient=False, logscale=False):
59 """
60 Set up the optimization problem in order to maximize
61 the log_marginal_likelihood.
62
63 :Parameters:
64
65 parametric_model : Classifier
66 the actual parameteric model to be optimized.
67
68 hyp_initial_guess : numpy.ndarray
69 set of hyperparameters' initial values where to start
70 optimization.
71
72 optimization_algorithm : string
73 actual name of the optimization algorithm. See
74 http://scipy.org/scipy/scikits/wiki/NLP
75 for a comprehensive/updated list of available NLP solvers.
76 (Defaults to 'ralg')
77
78 ftol : float
79 threshold for the stopping criterion of the solver,
80 which is mapped in OpenOpt NLP.ftol
81 (Defaults to 1.0e-3)
82
83 fixedHypers : numpy.ndarray (boolean array)
84 boolean vector of the same size of hyp_initial_guess;
85 'False' means that the corresponding hyperparameter must
86 be kept fixed (so not optimized).
87 (Defaults to None, which during means all True)
88
89 NOTE: the maximization of log_marginal_likelihood is a non-linear
90 optimization problem (NLP). This fact is confirmed by Dmitrey,
91 author of OpenOpt.
92 """
93 self.problem = None
94 self.use_gradient = use_gradient
95 self.logscale = logscale # use log-scale on hyperparameters to enhance numerical stability
96 self.optimization_algorithm = optimization_algorithm
97 self.hyp_initial_guess = N.array(hyp_initial_guess)
98 self.hyp_initial_guess_log = N.log(self.hyp_initial_guess)
99 if fixedHypers is None:
100 fixedHypers = N.zeros(self.hyp_initial_guess.shape[0],dtype=bool)
101 pass
102 self.freeHypers = -fixedHypers
103 if self.logscale:
104 self.hyp_running_guess = self.hyp_initial_guess_log.copy()
105 else:
106 self.hyp_running_guess = self.hyp_initial_guess.copy()
107 pass
108 self.f_last_x = None
109
110 def f(x):
111 """
112 Wrapper to the log_marginal_likelihood to be
113 maximized.
114 """
115 # XXX EO: since some OpenOpt NLP solvers does not
116 # implement lower bounds the hyperparameters bounds are
117 # implemented inside PyMVPA: (see dmitrey's post on
118 # [SciPy-user] 20080628).
119 #
120 # XXX EO: OpenOpt does not implement optimization of a
121 # subset of the hyperparameters so it is implemented here.
122 #
123 # XXX EO: OpenOpt does not implement logrithmic scale of
124 # the hyperparameters (to enhance numerical stability), so
125 # it is implemented here.
126 self.f_last_x = x.copy()
127 self.hyp_running_guess[self.freeHypers] = x
128 # REMOVE print "guess:",self.hyp_running_guess,x
129 try:
130 if self.logscale:
131 self.parametric_model.set_hyperparameters(N.exp(self.hyp_running_guess))
132 else:
133 self.parametric_model.set_hyperparameters(self.hyp_running_guess)
134 pass
135 except InvalidHyperparameterError:
136 if __debug__: debug("MOD_SEL","WARNING: invalid hyperparameters!")
137 return -N.inf
138 try:
139 self.parametric_model.train(self.dataset)
140 except (N.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError):
141 # Note that ValueError could be raised when Cholesky gets Inf or Nan.
142 if __debug__: debug("MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!")
143 return -N.inf
144 log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood()
145 # REMOVE print log_marginal_likelihood
146 return log_marginal_likelihood
147
148 def df(x):
149 """
150 Proxy to the log_marginal_likelihood first
151 derivative. Necessary for OpenOpt when using derivatives.
152 """
153 self.hyp_running_guess[self.freeHypers] = x
154 # REMOVE print "df guess:",self.hyp_running_guess,x
155 # XXX EO: Most of the following lines can be skipped if
156 # df() is computed just after f() with the same
157 # hyperparameters. The partial results obtained during f()
158 # are what is needed for df(). For now, in order to avoid
159 # bugs difficult to trace, we keep this redunundancy. A
160 # deep check with how OpenOpt works or using memoization
161 # should solve this issue.
162 try:
163 if self.logscale:
164 self.parametric_model.set_hyperparameters(N.exp(self.hyp_running_guess))
165 else:
166 self.parametric_model.set_hyperparameters(self.hyp_running_guess)
167 pass
168 except InvalidHyperparameterError:
169 if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!")
170 return -N.inf
171 # Check if it is possible to avoid useless computations
172 # already done in f(). According to tests and information
173 # collected from OpenOpt people, it is sufficiently
174 # unexpected that the following test succeed:
175 if N.any(x!=self.f_last_x):
176 if __debug__: debug("MOD_SEL","UNEXPECTED: recomputing train+log_marginal_likelihood.")
177 try:
178 self.parametric_model.train(self.dataset)
179 except (N.linalg.linalg.LinAlgError, SL.basic.LinAlgError, ValueError):
180 if __debug__: debug("MOD_SEL", "WARNING: Cholesky failed! Invalid hyperparameters!")
181 # XXX EO: which value for the gradient to return to
182 # OpenOpt when hyperparameters are wrong?
183 return N.zeros(x.size)
184 log_marginal_likelihood = self.parametric_model.compute_log_marginal_likelihood() # recompute what's needed (to be safe) REMOVE IN FUTURE!
185 pass
186 if self.logscale:
187 gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood_logscale()
188 else:
189 gradient_log_marginal_likelihood = self.parametric_model.compute_gradient_log_marginal_likelihood()
190 pass
191 # REMOVE print "grad:",gradient_log_marginal_likelihood
192 return gradient_log_marginal_likelihood[self.freeHypers]
193
194
195 if self.logscale:
196 # vector of hyperparameters' values where to start the search
197 x0 = self.hyp_initial_guess_log[self.freeHypers]
198 else:
199 x0 = self.hyp_initial_guess[self.freeHypers]
200 pass
201 self.contol = 1.0e-20 # Constraint tolerance level
202 # XXX EO: is it necessary to use contol when self.logscale is
203 # True and there is no lb? Ask dmitrey.
204 if self.use_gradient:
205 # actual instance of the OpenOpt non-linear problem
206 self.problem = NLP(f, x0, df=df, contol=self.contol, goal='maximum')
207 else:
208 self.problem = NLP(f, x0, contol=self.contol, goal='maximum')
209 pass
210 self.problem.name = "Max LogMargLikelihood"
211 if not self.logscale:
212 # set lower bound for hyperparameters: avoid negative
213 # hyperparameters. Note: problem.n is the size of
214 # hyperparameters' vector
215 self.problem.lb = N.zeros(self.problem.n)+self.contol
216 pass
217 # max number of iterations for the optimizer.
218 self.problem.maxiter = maxiter
219 # check whether the derivative of log_marginal_likelihood converged to
220 # zero before ending optimization
221 self.problem.checkdf = True
222 # set increment of log_marginal_likelihood under which the optimizer stops
223 self.problem.ftol = ftol
224 self.problem.iprint = _openopt_debug()
225 return self.problem
226
227
229 """Solve the maximization problem, check outcome and collect results.
230 """
231 # XXX: this method can be made more abstract in future in the
232 # sense that it could work not only for
233 # log_marginal_likelihood but other measures as well
234 # (e.g. cross-valideted error).
235
236 if N.all(self.freeHypers==False): # no optimization needed
237 self.hyperparameters_best = self.hyp_initial_guess.copy()
238 try:
239 self.parametric_model.set_hyperparameters(self.hyperparameters_best)
240 except InvalidHyperparameterError:
241 if __debug__: debug("MOD_SEL", "WARNING: invalid hyperparameters!")
242 self.log_marginal_likelihood_best = -N.inf
243 return self.log_marginal_likelihood_best
244 self.parametric_model.train(self.dataset)
245 self.log_marginal_likelihood_best = self.parametric_model.compute_log_marginal_likelihood()
246 return self.log_marginal_likelihood_best
247
248 result = self.problem.solve(self.optimization_algorithm) # perform optimization!
249 if result.stopcase == -1:
250 # XXX: should we use debug() for the following messages?
251 # If so, how can we track the missing convergence to a
252 # solution?
253 print "Unable to find a maximum to log_marginal_likelihood"
254 elif result.stopcase == 0:
255 print "Limits exceeded"
256 elif result.stopcase == 1:
257 self.hyperparameters_best = self.hyp_initial_guess.copy()
258 if self.logscale:
259 self.hyperparameters_best[self.freeHypers] = N.exp(result.xf) # best hyperparameters found # NOTE is it better to return a copy?
260 else:
261 self.hyperparameters_best[self.freeHypers] = result.xf
262 pass
263 self.log_marginal_likelihood_best = result.ff # actual best vuale of log_marginal_likelihood
264 pass
265 self.stopcase = result.stopcase
266 return self.log_marginal_likelihood_best
267
268 pass
269
270
271
272 if __name__ == "__main__":
273
274 import gpr
275 import kernel
276 from mvpa.misc import data_generators
277 from mvpa.base import externals
278 N.random.seed(1)
279
280 if externals.exists("pylab", force=True):
281 import pylab
282 pylab.close("all")
283 pylab.ion()
284
285 from mvpa.datasets import Dataset
286 from mvpa.misc import data_generators
287
288 print "GPR:",
289
290 train_size = 40
291 test_size = 100
292 F = 1
293
294 dataset = data_generators.sinModulated(train_size, F)
295 # print label_train
296
297 dataset_test = data_generators.sinModulated(test_size, F, flat=True)
298 data_test = dataset_test.samples
299 label_test = dataset_test.labels
300 # print label_test
301
302 regression = True
303 logml = True
304
305 k = kernel.KernelSquaredExponential()
306 g = gpr.GPR(k,regression=regression)
307 g.states.enable("log_marginal_likelihood")
308 # g.train_fv = dataset.samples
309 # g.train_labels = dataset.labels
310
311 print "GPR hyperparameters' search through maximization of marginal likelihood on train data."
312 print
313 ms = ModelSelector(g,dataset)
314
315 sigma_noise_initial = 1.0e0 # 0.154142346606
316 sigma_f_initial = 1.0e0 # 0.687554871058
317 length_scale_initial = 1.0e0 # 0.263620251025
318
319 problem = ms.max_log_marginal_likelihood(hyp_initial_guess=[sigma_noise_initial,sigma_f_initial,length_scale_initial], optimization_algorithm="ralg", ftol=1.0e-8,fixedHypers=N.array([0,0,0],dtype=bool))
320 # problem = ms.max_log_marginal_likelihood(hyp_initial_guess=[1.0,1.0], optimization_algorithm="ralg", ftol=1.0e-3)
321
322 lml = ms.solve()
323 # print ms.hyperparameters_best
324 sigma_noise_best, sigma_f_best, length_scale_best = ms.hyperparameters_best
325 print
326 print "Best sigma_noise:",sigma_noise_best
327 print "Best sigma_f:",sigma_f_best
328 print "Best length_scale:",length_scale_best
329 print "Best log_marginal_likelihood:",lml
330
331 # Best sigma_noise: 0.154142346606
332 # Best sigma_f: 0.687554871058
333 # Best length_scale: 0.263620251025
334 # Best log_marginal_likelihood: -3.54790161194
335
336 gpr.compute_prediction(sigma_noise_best,sigma_f_best,length_scale_best,regression,dataset,data_test,label_test,F)
337
338
339 print
340 print "GPR ARD on dataset from Williams and Rasmussen 1996:"
341 dataset = data_generators.wr1996()
342 # dataset.samples = N.hstack([dataset.samples]*10) # enlarge dataset's dimensionality, for testing high dimensions
343
344 # Uncomment the kernel you like:
345
346 # Squared Exponential kernel:
347 k = kernel.KernelSquaredExponential()
348 sigma_noise_initial = 1.0e-0
349 sigma_f_initial = 1.0e-0
350 length_scale_initial = N.ones(dataset.samples.shape[1])*1.0e-0
351 hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_f_initial,length_scale_initial])
352 # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool)
353 fixedHypers = N.array([0]*(hyp_initial_guess.size),dtype=bool)
354
355 # k = kernel.KernelLinear()
356 # sigma_noise_initial = 1.0e-0
357 # sigma_0_initial = 1.0e-0
358 # Sigma_p_initial = N.ones(dataset.samples.shape[1])*1.0e-3
359 # hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_0_initial,Sigma_p_initial])
360 # # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool)
361 # fixedHypers = N.array([0]*hyp_initial_guess.size,dtype=bool)
362
363 # k = kernel.KernelConstant()
364 # sigma_noise_initial = 1.0e-0
365 # sigma_0_initial = 1.0e-0
366 # hyp_initial_guess = N.array([sigma_noise_initial, sigma_0_initial])
367 # # fixedHypers = N.array([0,0],dtype=bool)
368 # fixedHypers = N.array([0]*hyp_initial_guess.size,dtype=bool)
369
370 # # Exponential kernel:
371 # k = kernel.KernelExponential()
372 # sigma_noise_initial = 1.0e0
373 # sigma_f_initial = 1.0e0
374 # length_scale_initial = N.ones(dataset.samples.shape[1])*1.0e0
375 # # length_scale_initial = 1.0
376 # hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_f_initial,length_scale_initial])
377 # print "hyp_initial_guess:",hyp_initial_guess
378 # # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool)
379 # fixedHypers = N.array([0]*(hyp_initial_guess.size),dtype=bool)
380 # # expected results (compute with use_gradient=False):
381 # # objFunValue: 161.97952 (feasible, max constraint = 0)
382 # # [ 6.72069299e-04 3.16515151e-01 3.11122154e+00 1.54833211e+00
383 # # 1.94703461e+00 3.11122835e+00 4.37916189e+01 2.65398676e+01]
384
385
386 # # Matern_3_2 kernel:
387 # k = kernel.KernelMatern_3_2()
388 # sigma_noise_initial = 1.0e0
389 # sigma_f_initial = 1.0e0
390 # length_scale_initial = N.ones(dataset.samples.shape[1])*1.0e0
391 # # length_scale_initial = 1.0
392 # hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_f_initial,length_scale_initial])
393 # print "hyp_initial_guess:",hyp_initial_guess
394 # # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool)
395 # fixedHypers = N.array([0]*(hyp_initial_guess.size),dtype=bool)
396
397
398 # # Rational Quadratic kernel:
399 # k = kernel.KernelRationalQuadratic(alpha=0.5)
400 # sigma_noise_initial = 1.0e0
401 # sigma_f_initial = 1.0e0
402 # length_scale_initial = N.ones(dataset.samples.shape[1])*1.0e0
403 # # length_scale_initial = 1.0
404 # hyp_initial_guess = N.hstack([sigma_noise_initial,sigma_f_initial,length_scale_initial])
405 # print "hyp_initial_guess:",hyp_initial_guess
406 # # fixedHypers = N.array([0,0,0,0,0,0,0,0],dtype=bool)
407 # fixedHypers = N.array([0]*(hyp_initial_guess.size),dtype=bool)
408
409
410 g = gpr.GPR(k,regression=regression)
411 g.states.enable("log_marginal_likelihood")
412 ms = ModelSelector(g,dataset)
413
414 # Note that some kernels does not have gradient yet!
415 problem = ms.max_log_marginal_likelihood(hyp_initial_guess=hyp_initial_guess, optimization_algorithm="ralg", ftol=1.0e-5,fixedHypers=fixedHypers,use_gradient=True, logscale=True)
416 lml = ms.solve()
417 print ms.hyperparameters_best
418
| Home | Trees | Indices | Help |
|
|---|
| Generated by Epydoc 3.0beta1 on Mon Feb 23 10:50:06 2009 | http://epydoc.sourceforge.net |