Package mvpa :: Package clfs :: Module lars
[hide private]
[frames] | no frames]

Source Code for Module mvpa.clfs.lars

  1  #emacs: -*- mode: python-mode; py-indent-offset: 4; indent-tabs-mode: nil -*- 
  2  #ex: set sts=4 ts=4 sw=4 et: 
  3  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  4  # 
  5  #   See COPYING file distributed along with the PyMVPA package for the 
  6  #   copyright and license terms. 
  7  # 
  8  ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ## 
  9  """Least angle regression (LARS) classifier.""" 
 10   
 11  __docformat__ = 'restructuredtext' 
 12   
 13  # system imports 
 14  import numpy as N 
 15   
 16  import mvpa.base.externals as externals 
 17  externals.exists('rpy', raiseException=True) 
 18  externals.exists('lars', raiseException=True) 
 19   
 20  # do conditional to be able to build module reference 
 21  if externals.exists('rpy') and externals.exists('lars'): 
 22      import rpy 
 23      rpy.r.library('lars') 
 24   
 25   
 26  # local imports 
 27  from mvpa.clfs.base import Classifier 
 28  from mvpa.measures.base import Sensitivity 
 29   
 30  if __debug__: 
 31      from mvpa.base import debug 
 32   
 33  known_models = ('lasso', 'stepwise', 'lar', 'forward.stagewise') 
 34   
35 -class LARS(Classifier):
36 """Least angle regression (LARS) `Classifier`. 37 38 LARS is the model selection algorithm from: 39 40 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert 41 Tibshirani, Least Angle Regression Annals of Statistics (with 42 discussion) (2004) 32(2), 407-499. A new method for variable 43 subset selection, with the lasso and 'epsilon' forward stagewise 44 methods as special cases. 45 46 Similar to SMLR, it performs a feature selection while performing 47 classification, but instead of starting with all features, it 48 starts with none and adds them in, which is similar to boosting. 49 50 This classifier behaves more like a ridge regression in that it 51 returns prediction values and it treats the training labels as 52 continuous. 53 54 In the true nature of the PyMVPA framework, this algorithm is 55 actually implemented in R by Trevor Hastie and wrapped via RPy. 56 To make use of LARS, you must have R and RPy installed as well as 57 the LARS contributed package. You can install the R and RPy with 58 the following command on Debian-based machines: 59 60 sudo aptitude install python-rpy python-rpy-doc r-base-dev 61 62 You can then install the LARS package by running R as root and 63 calling: 64 65 install.packages() 66 67 """ 68 69 # XXX from yoh: it is linear, isn't it? 70 _clf_internals = [ 'lars', 'regression', 'linear', 'has_sensitivity', 71 'does_feature_selection', 72 ]
73 - def __init__(self, model_type="lasso", trace=False, normalize=True, 74 intercept=True, max_steps=None, use_Gram=False, **kwargs):
75 """ 76 Initialize LARS. 77 78 See the help in R for further details on the following parameters: 79 80 :Parameters: 81 model_type : string 82 Type of LARS to run. Can be one of ('lasso', 'lar', 83 'forward.stagewise', 'stepwise'). 84 trace : boolean 85 Whether to print progress in R as it works. 86 normalize : boolean 87 Whether to normalize the L2 Norm. 88 intercept : boolean 89 Whether to add a non-penalized intercept to the model. 90 max_steps : None or int 91 If not None, specify the total number of iterations to run. Each 92 iteration adds a feature, but leaving it none will add until 93 convergence. 94 use_Gram : boolean 95 Whether to compute the Gram matrix (this should be false if you 96 have more features than samples.) 97 """ 98 # init base class first 99 Classifier.__init__(self, **kwargs) 100 101 if not model_type in known_models: 102 raise ValueError('Unknown model %s for LARS is specified. Known' % 103 model_type + 'are %s' % `known_models`) 104 105 # set up the params 106 self.__type = model_type 107 self.__normalize = normalize 108 self.__intercept = intercept 109 self.__trace = trace 110 self.__max_steps = max_steps 111 self.__use_Gram = use_Gram 112 113 # pylint friendly initializations 114 self.__weights = None 115 """The beta weights for each feature.""" 116 self.__trained_model = None 117 """The model object after training that will be used for 118 predictions.""" 119 120 # It does not make sense to calculate a confusion matrix for a 121 # regression 122 self.states.enable('training_confusion', False)
123
124 - def __repr__(self):
125 """String summary of the object 126 """ 127 return """LARS(type=%s, normalize=%s, intercept=%s, trace=%s, max_steps=%s, use_Gram=%s, enable_states=%s)""" % \ 128 (self.__type, 129 self.__normalize, 130 self.__intercept, 131 self.__trace, 132 self.__max_steps, 133 self.__use_Gram, 134 str(self.states.enabled))
135 136
137 - def _train(self, data):
138 """Train the classifier using `data` (`Dataset`). 139 """ 140 if self.__max_steps is None: 141 # train without specifying max_steps 142 self.__trained_model = rpy.r.lars(data.samples, 143 data.labels[:,N.newaxis], 144 type=self.__type, 145 normalize=self.__normalize, 146 intercept=self.__intercept, 147 trace=self.__trace, 148 use_Gram=self.__use_Gram) 149 else: 150 # train with specifying max_steps 151 self.__trained_model = rpy.r.lars(data.samples, 152 data.labels[:,N.newaxis], 153 type=self.__type, 154 normalize=self.__normalize, 155 intercept=self.__intercept, 156 trace=self.__trace, 157 use_Gram=self.__use_Gram, 158 max_steps=self.__max_steps) 159 160 # find the step with the lowest Cp (risk) 161 # it is often the last step if you set a max_steps 162 # must first convert dictionary to array 163 Cp_vals = N.asarray([self.__trained_model['Cp'][str(x)] 164 for x in range(len(self.__trained_model['Cp']))]) 165 self.__lowest_Cp_step = Cp_vals.argmin() 166 167 # set the weights to the lowest Cp step 168 self.__weights = self.__trained_model['beta'][self.__lowest_Cp_step,:]
169 170 # # set the weights to the final state 171 # self.__weights = self.__trained_model['beta'][-1,:] 172 173
174 - def _predict(self, data):
175 """ 176 Predict the output for the provided data. 177 """ 178 # predict with the final state (i.e., the last step) 179 # predict with the lowest Cp step 180 res = rpy.r.predict_lars(self.__trained_model, 181 data, 182 mode='step', 183 s=self.__lowest_Cp_step) 184 #s=self.__trained_model['beta'].shape[0]) 185 186 fit = N.asarray(res['fit']) 187 if len(fit.shape) == 0: 188 # if we just got 1 sample with a scalar 189 fit = fit.reshape( (1,) ) 190 return fit
191 192
193 - def _getFeatureIds(self):
194 """Return ids of the used features 195 """ 196 return N.where(N.abs(self.__weights)>0)[0]
197 198 199
200 - def getSensitivityAnalyzer(self, **kwargs):
201 """Returns a sensitivity analyzer for LARS.""" 202 return LARSWeights(self, **kwargs)
203 204 weights = property(lambda self: self.__weights)
205 206 207
208 -class LARSWeights(Sensitivity):
209 """`SensitivityAnalyzer` that reports the weights LARS trained 210 on a given `Dataset`. 211 """ 212 213 _LEGAL_CLFS = [ LARS ] 214
215 - def _call(self, dataset=None):
216 """Extract weights from LARS classifier. 217 218 LARS always has weights available, so nothing has to be computed here. 219 """ 220 clf = self.clf 221 weights = clf.weights 222 223 if __debug__: 224 debug('LARS', 225 "Extracting weights for LARS - "+ 226 "Result: min=%f max=%f" %\ 227 (N.min(weights), N.max(weights))) 228 229 return weights
230