Package mvpa :: Package clfs :: Module smlr
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.smlr

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Sparse Multinomial Logistic Regression classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  import numpy as N 
 14   
 15  from mvpa.base import warning, externals 
 16  from mvpa.clfs.base import Classifier 
 17  from mvpa.measures.base import Sensitivity 
 18  from mvpa.misc.exceptions import ConvergenceError 
 19  from mvpa.misc.param import Parameter 
 20  from mvpa.misc.state import StateVariable 
 21  from mvpa.misc.transformers import SecondAxisMaxOfAbs 
 22   
 23   
 24  _DEFAULT_IMPLEMENTATION = "Python" 
 25  if externals.exists('ctypes'): 
 26      # Uber-fast C-version of the stepwise regression 
 27      from mvpa.clfs.libsmlrc import stepwise_regression as _cStepwiseRegression 
 28      _DEFAULT_IMPLEMENTATION = "C" 
 29  else: 
 30      _cStepwiseRegression = None 
 31      warning("SMLR implementation without ctypes is overwhelmingly slow." 
 32              " You are strongly advised to install python-ctypes") 
 33   
 34  if __debug__: 
 35      from mvpa.base import debug 
 36   
37 -def _label2oneofm(labels, ulabels):
38 """Convert labels to one-of-M form. 39 40 TODO: Might be useful elsewhere so could migrate into misc/ 41 """ 42 43 # allocate for the new one-of-M labels 44 new_labels = N.zeros((len(labels), len(ulabels))) 45 46 # loop and convert to one-of-M 47 for i, c in enumerate(ulabels): 48 new_labels[labels == c, i] = 1 49 50 return new_labels
51 52 53
54 -class SMLR(Classifier):
55 """Sparse Multinomial Logistic Regression `Classifier`. 56 57 This is an implementation of the SMLR algorithm published in 58 :ref:`Krishnapuram et al., 2005 <KCF+05>` (2005, IEEE Transactions 59 on Pattern Analysis and Machine Intelligence). Be sure to cite 60 that article if you use this classifier for your work. 61 """ 62 63 _clf_internals = [ 'smlr', 'linear', 'has_sensitivity', 'binary', 64 'multiclass', 'does_feature_selection' ] 65 # XXX: later 'kernel-based'? 66 67 lm = Parameter(.1, min=1e-10, allowedtype='float', 68 doc="""The penalty term lambda. Larger values will give rise to 69 more sparsification.""") 70 71 convergence_tol = Parameter(1e-3, min=1e-10, max=1.0, allowedtype='float', 72 doc="""When the weight change for each cycle drops below this value 73 the regression is considered converged. Smaller values 74 lead to tighter convergence.""") 75 76 resamp_decay = Parameter(0.5, allowedtype='float', min=0.0, max=1.0, 77 doc="""Decay rate in the probability of resampling a zero weight. 78 1.0 will immediately decrease to the min_resamp from 1.0, 0.0 79 will never decrease from 1.0.""") 80 81 min_resamp = Parameter(0.001, allowedtype='float', min=1e-10, max=1.0, 82 doc="Minimum resampling probability for zeroed weights") 83 84 maxiter = Parameter(10000, allowedtype='int', min=1, 85 doc="""Maximum number of iterations before stopping if not 86 converged.""") 87 88 has_bias = Parameter(True, allowedtype='bool', 89 doc="""Whether to add a bias term to allow fits to data not through 90 zero""") 91 92 fit_all_weights = Parameter(True, allowedtype='bool', 93 doc="""Whether to fit weights for all classes or to the number of 94 classes minus one. Both should give nearly identical results, but 95 if you set fit_all_weights to True it will take a little longer 96 and yield weights that are fully analyzable for each class. Also, 97 note that the convergence rate may be different, but convergence 98 point is the same.""") 99 100 implementation = Parameter(_DEFAULT_IMPLEMENTATION, 101 allowedtype='basestring', 102 choices=["C", "Python"], 103 doc="""Use C or Python as the implementation of 104 stepwise_regression. C version brings significant speedup thus is 105 the default one.""") 106 107 seed = Parameter(None, allowedtype='None or int', 108 doc="""Seed to be used to initialize random generator, might be 109 used to replicate the run""") 110 111
112 - def __init__(self, **kwargs):
113 """Initialize an SMLR classifier. 114 """ 115 116 """ 117 TODO: 118 # Add in likelihood calculation 119 # Add kernels, not just direct methods. 120 """ 121 # init base class first 122 Classifier.__init__(self, **kwargs) 123 124 if _cStepwiseRegression is None and self.implementation == 'C': 125 warning('SMLR: C implementation is not available.' 126 ' Using pure Python one') 127 self.implementation = 'Python' 128 129 # pylint friendly initializations 130 self.__ulabels = None 131 """Unigue labels from the training set.""" 132 self.__weights_all = None 133 """Contains all weights including bias values""" 134 self.__weights = None 135 """Just the weights, without the biases""" 136 self.__biases = None 137 """The biases, will remain none if has_bias is False"""
138 139
140 - def _pythonStepwiseRegression(self, w, X, XY, Xw, E, 141 auto_corr, 142 lambda_over_2_auto_corr, 143 S, 144 M, 145 maxiter, 146 convergence_tol, 147 resamp_decay, 148 min_resamp, 149 verbose, 150 seed = None):
151 """The (much slower) python version of the stepwise 152 regression. I'm keeping this around for now so that we can 153 compare results.""" 154 155 # get the data information into easy vars 156 ns, nd = X.shape 157 158 # initialize the iterative optimization 159 converged = False 160 incr = N.finfo(N.float).max 161 non_zero, basis, m, wasted_basis, cycles = 0, 0, 0, 0, 0 162 sum2_w_diff, sum2_w_old, w_diff = 0.0, 0.0, 0.0 163 p_resamp = N.ones(w.shape, dtype=N.float) 164 165 if seed is not None: 166 # set the random seed 167 N.random.seed(seed) 168 169 if __debug__: 170 debug("SMLR_", "random seed=%s" % seed) 171 172 # perform the optimization 173 while not converged and cycles < maxiter: 174 # get the starting weight 175 w_old = w[basis, m] 176 177 # see if we're gonna update 178 if (w_old != 0) or N.random.rand() < p_resamp[basis, m]: 179 # let's do it 180 # get the probability 181 P = E[:, m]/S 182 183 # set the gradient 184 grad = XY[basis, m] - N.dot(X[:, basis], P) 185 186 # calculate the new weight with the Laplacian prior 187 w_new = w_old + grad/auto_corr[basis] 188 189 # keep weights within bounds 190 if w_new > lambda_over_2_auto_corr[basis]: 191 w_new -= lambda_over_2_auto_corr[basis] 192 changed = True 193 # unmark from being zero if necessary 194 if w_old == 0: 195 non_zero += 1 196 # reset the prob of resampling 197 p_resamp[basis, m] = 1.0 198 elif w_new < -lambda_over_2_auto_corr[basis]: 199 w_new += lambda_over_2_auto_corr[basis] 200 changed = True 201 # unmark from being zero if necessary 202 if w_old == 0: 203 non_zero += 1 204 # reset the prob of resampling 205 p_resamp[basis, m] = 1.0 206 else: 207 # gonna zero it out 208 w_new = 0.0 209 210 # decrease the p_resamp 211 p_resamp[basis, m] -= (p_resamp[basis, m] - \ 212 min_resamp) * resamp_decay 213 214 # set number of non-zero 215 if w_old == 0: 216 changed = False 217 wasted_basis += 1 218 else: 219 changed = True 220 non_zero -= 1 221 222 # process any changes 223 if changed: 224 #print "w[%d, %d] = %g" % (basis, m, w_new) 225 # update the expected values 226 w_diff = w_new - w_old 227 Xw[:, m] = Xw[:, m] + X[:, basis]*w_diff 228 E_new_m = N.exp(Xw[:, m]) 229 S += E_new_m - E[:, m] 230 E[:, m] = E_new_m 231 232 # update the weight 233 w[basis, m] = w_new 234 235 # keep track of the sqrt sum squared diffs 236 sum2_w_diff += w_diff*w_diff 237 238 # add to the old no matter what 239 sum2_w_old += w_old*w_old 240 241 # update the class and basis 242 m = N.mod(m+1, w.shape[1]) 243 if m == 0: 244 # we completed a cycle of labels 245 basis = N.mod(basis+1, nd) 246 if basis == 0: 247 # we completed a cycle of features 248 cycles += 1 249 250 # assess convergence 251 incr = N.sqrt(sum2_w_diff) / \ 252 (N.sqrt(sum2_w_old)+N.finfo(N.float).eps) 253 254 # save the new weights 255 converged = incr < convergence_tol 256 257 if __debug__: 258 debug("SMLR_", \ 259 "cycle=%d ; incr=%g ; non_zero=%d ; " % 260 (cycles, incr, non_zero) + 261 "wasted_basis=%d ; " % 262 (wasted_basis) + 263 "sum2_w_old=%g ; sum2_w_diff=%g" % 264 (sum2_w_old, sum2_w_diff)) 265 266 # reset the sum diffs and wasted_basis 267 sum2_w_diff = 0.0 268 sum2_w_old = 0.0 269 wasted_basis = 0 270 271 272 if not converged: 273 raise ConvergenceError, \ 274 "More than %d Iterations without convergence" % \ 275 (maxiter) 276 277 # calcualte the log likelihoods and posteriors for the training data 278 #log_likelihood = x 279 280 return cycles
281 282
283 - def _train(self, dataset):
284 """Train the classifier using `dataset` (`Dataset`). 285 """ 286 # Process the labels to turn into 1 of N encoding 287 labels = _label2oneofm(dataset.labels, dataset.uniquelabels) 288 self.__ulabels = dataset.uniquelabels.copy() 289 290 Y = labels 291 M = len(self.__ulabels) 292 293 # get the dataset information into easy vars 294 X = dataset.samples 295 296 # see if we are adding a bias term 297 if self.params.has_bias: 298 if __debug__: 299 debug("SMLR_", "hstacking 1s for bias") 300 301 # append the bias term to the features 302 X = N.hstack((X, N.ones((X.shape[0], 1), dtype=X.dtype))) 303 304 if self.params.implementation.upper() == 'C': 305 _stepwise_regression = _cStepwiseRegression 306 # 307 # TODO: avoid copying to non-contig arrays, use strides in ctypes? 308 if not (X.flags['C_CONTIGUOUS'] and X.flags['ALIGNED']): 309 if __debug__: 310 debug("SMLR_", 311 "Copying data to get it C_CONTIGUOUS/ALIGNED") 312 X = N.array(X, copy=True, dtype=N.double, order='C') 313 314 # currently must be double for the C code 315 if X.dtype != N.double: 316 if __debug__: 317 debug("SMLR_", "Converting data to double") 318 # must cast to double 319 X = X.astype(N.double) 320 321 # set the feature dimensions 322 elif self.params.implementation.upper() == 'PYTHON': 323 _stepwise_regression = self._pythonStepwiseRegression 324 else: 325 raise ValueError, \ 326 "Unknown implementation %s of stepwise_regression" % \ 327 self.params.implementation 328 329 # set the feature dimensions 330 ns, nd = X.shape 331 332 # decide the size of weights based on num classes estimated 333 if self.params.fit_all_weights: 334 c_to_fit = M 335 else: 336 c_to_fit = M-1 337 338 # Precompute what we can 339 auto_corr = ((M-1.)/(2.*M))*(N.sum(X*X, 0)) 340 XY = N.dot(X.T, Y[:, :c_to_fit]) 341 lambda_over_2_auto_corr = (self.params.lm/2.)/auto_corr 342 343 # set starting values 344 w = N.zeros((nd, c_to_fit), dtype=N.double) 345 Xw = N.zeros((ns, c_to_fit), dtype=N.double) 346 E = N.ones((ns, c_to_fit), dtype=N.double) 347 S = M*N.ones(ns, dtype=N.double) 348 349 # set verbosity 350 if __debug__: 351 verbosity = int( "SMLR_" in debug.active ) 352 debug('SMLR_', 'Calling stepwise_regression. Seed %s' % self.params.seed) 353 else: 354 verbosity = 0 355 356 # call the chosen version of stepwise_regression 357 cycles = _stepwise_regression(w, 358 X, 359 XY, 360 Xw, 361 E, 362 auto_corr, 363 lambda_over_2_auto_corr, 364 S, 365 M, 366 self.params.maxiter, 367 self.params.convergence_tol, 368 self.params.resamp_decay, 369 self.params.min_resamp, 370 verbosity, 371 self.params.seed) 372 373 if cycles >= self.params.maxiter: 374 # did not converge 375 raise ConvergenceError, \ 376 "More than %d Iterations without convergence" % \ 377 (self.params.maxiter) 378 379 # save the weights 380 self.__weights_all = w 381 self.__weights = w[:dataset.nfeatures, :] 382 383 if self.states.isEnabled('feature_ids'): 384 self.feature_ids = N.where(N.max(N.abs(w[:dataset.nfeatures, :]), 385 axis=1)>0)[0] 386 387 # and a bias 388 if self.params.has_bias: 389 self.__biases = w[-1, :] 390 391 if __debug__: 392 debug('SMLR', "train finished in %d cycles on data.shape=%s " % 393 (cycles, X.shape) + 394 "min:max(data)=%f:%f, got min:max(w)=%f:%f" % 395 (N.min(X), N.max(X), N.min(w), N.max(w)))
396 397
398 - def _getFeatureIds(self):
399 """Return ids of the used features 400 """ 401 return N.where(N.max(N.abs(self.__weights), axis=1)>0)[0]
402 403
404 - def _predict(self, data):
405 """Predict the output for the provided data. 406 """ 407 # see if we are adding a bias term 408 if self.params.has_bias: 409 # append the bias term to the features 410 data = N.hstack((data, 411 N.ones((data.shape[0], 1), dtype=data.dtype))) 412 413 # append the zeros column to the weights if necessary 414 if self.params.fit_all_weights: 415 w = self.__weights_all 416 else: 417 w = N.hstack((self.__weights_all, 418 N.zeros((self.__weights_all.shape[0], 1)))) 419 420 # determine the probability values for making the prediction 421 dot_prod = N.dot(data, w) 422 E = N.exp(dot_prod) 423 S = N.sum(E, 1) 424 425 if __debug__: 426 debug('SMLR', "predict on data.shape=%s min:max(data)=%f:%f " % 427 (`data.shape`, N.min(data), N.max(data)) + 428 "min:max(w)=%f:%f min:max(dot_prod)=%f:%f min:max(E)=%f:%f" % 429 (N.min(w), N.max(w), N.min(dot_prod), N.max(dot_prod), 430 N.min(E), N.max(E))) 431 432 values = E / S[:, N.newaxis].repeat(E.shape[1], axis=1) 433 self.values = values 434 435 # generate predictions 436 predictions = N.asarray([self.__ulabels[N.argmax(vals)] 437 for vals in values]) 438 # no need to assign state variable here -- would be done 439 # in Classifier._postpredict anyway 440 #self.predictions = predictions 441 442 return predictions
443 444
445 - def getSensitivityAnalyzer(self, **kwargs):
446 """Returns a sensitivity analyzer for SMLR.""" 447 kwargs.setdefault('combiner', SecondAxisMaxOfAbs) 448 return SMLRWeights(self, **kwargs)
449 450 451 biases = property(lambda self: self.__biases) 452 weights = property(lambda self: self.__weights)
453 454 455
456 -class SMLRWeights(Sensitivity):
457 """`SensitivityAnalyzer` that reports the weights SMLR trained 458 on a given `Dataset`. 459 460 By default SMLR provides multiple weights per feature (one per label in 461 training dataset). By default, all weights are combined into a single 462 sensitivity value. Please, see the `FeaturewiseDatasetMeasure` constructor 463 arguments how to custmize this behavior. 464 """ 465 466 biases = StateVariable(enabled=True, 467 doc="A 1-d ndarray of biases") 468 469 _LEGAL_CLFS = [ SMLR ] 470 471
472 - def _call(self, dataset=None):
473 """Extract weights from SMLR classifier. 474 475 SMLR always has weights available, so nothing has to be computed here. 476 """ 477 clf = self.clf 478 weights = clf.weights 479 # XXX: MH: The following warning is inappropriate. In almost all cases 480 # SMLR will return more than one weight per feature. Even in the case of 481 # binary problem it will fit both weights by default. So unless you 482 # specify fit_all_weights=False manually this warning is always there. 483 # To much annoyance IMHO. I moved this information into the docstring, 484 # as there is no technical problem here, as FeaturewiseDatasetMeasure 485 # by default applies a combiner -- just that people should know... 486 # PLEASE ACKNOWLEDGE AND REMOVE 487 #if weights.shape[1] != 1: 488 # warning("You are estimating sensitivity for SMLR %s with multiple" 489 # " sensitivities available %s. Make sure that it is what you" 490 # " intended to do" % (self, weights.shape) ) 491 492 if clf.has_bias: 493 self.biases = clf.biases 494 495 if __debug__: 496 debug('SMLR', 497 "Extracting weights for %d-class SMLR" % 498 (weights.shape[1]+1) + 499 "Result: min=%f max=%f" %\ 500 (N.min(weights), N.max(weights))) 501 502 return weights
503