1
2
3
4
5
6
7
8
9 """Least angle regression (LARS) classifier."""
10
11 __docformat__ = 'restructuredtext'
12
13
14 import numpy as N
15
16 import mvpa.base.externals as externals
17 externals.exists('rpy', raiseException=True)
18 externals.exists('lars', raiseException=True)
19
20
21 if externals.exists('rpy') and externals.exists('lars'):
22 import rpy
23 rpy.r.library('lars')
24
25
26
27 from mvpa.clfs.base import Classifier
28 from mvpa.measures.base import Sensitivity
29
30 if __debug__:
31 from mvpa.base import debug
32
33 known_models = ('lasso', 'stepwise', 'lar', 'forward.stagewise')
34
35 -class LARS(Classifier):
36 """Least angle regression (LARS) `Classifier`.
37
38 LARS is the model selection algorithm from:
39
40 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert
41 Tibshirani, Least Angle Regression Annals of Statistics (with
42 discussion) (2004) 32(2), 407-499. A new method for variable
43 subset selection, with the lasso and 'epsilon' forward stagewise
44 methods as special cases.
45
46 Similar to SMLR, it performs a feature selection while performing
47 classification, but instead of starting with all features, it
48 starts with none and adds them in, which is similar to boosting.
49
50 This classifier behaves more like a ridge regression in that it
51 returns prediction values and it treats the training labels as
52 continuous.
53
54 In the true nature of the PyMVPA framework, this algorithm is
55 actually implemented in R by Trevor Hastie and wrapped via RPy.
56 To make use of LARS, you must have R and RPy installed as well as
57 the LARS contributed package. You can install the R and RPy with
58 the following command on Debian-based machines:
59
60 sudo aptitude install python-rpy python-rpy-doc r-base-dev
61
62 You can then install the LARS package by running R as root and
63 calling:
64
65 install.packages()
66
67 """
68
69
70 _clf_internals = [ 'lars', 'regression', 'linear', 'has_sensitivity',
71 'does_feature_selection',
72 ]
73 - def __init__(self, model_type="lasso", trace=False, normalize=True,
74 intercept=True, max_steps=None, use_Gram=False, **kwargs):
75 """
76 Initialize LARS.
77
78 See the help in R for further details on the following parameters:
79
80 :Parameters:
81 model_type : string
82 Type of LARS to run. Can be one of ('lasso', 'lar',
83 'forward.stagewise', 'stepwise').
84 trace : boolean
85 Whether to print progress in R as it works.
86 normalize : boolean
87 Whether to normalize the L2 Norm.
88 intercept : boolean
89 Whether to add a non-penalized intercept to the model.
90 max_steps : None or int
91 If not None, specify the total number of iterations to run. Each
92 iteration adds a feature, but leaving it none will add until
93 convergence.
94 use_Gram : boolean
95 Whether to compute the Gram matrix (this should be false if you
96 have more features than samples.)
97 """
98
99 Classifier.__init__(self, **kwargs)
100
101 if not model_type in known_models:
102 raise ValueError('Unknown model %s for LARS is specified. Known' %
103 model_type + 'are %s' % `known_models`)
104
105
106 self.__type = model_type
107 self.__normalize = normalize
108 self.__intercept = intercept
109 self.__trace = trace
110 self.__max_steps = max_steps
111 self.__use_Gram = use_Gram
112
113
114 self.__weights = None
115 """The beta weights for each feature."""
116 self.__trained_model = None
117 """The model object after training that will be used for
118 predictions."""
119
120
121
122 self.states.enable('training_confusion', False)
123
125 """String summary of the object
126 """
127 return """LARS(type=%s, normalize=%s, intercept=%s, trace=%s, max_steps=%s, use_Gram=%s, enable_states=%s)""" % \
128 (self.__type,
129 self.__normalize,
130 self.__intercept,
131 self.__trace,
132 self.__max_steps,
133 self.__use_Gram,
134 str(self.states.enabled))
135
136
138 """Train the classifier using `data` (`Dataset`).
139 """
140 if self.__max_steps is None:
141
142 self.__trained_model = rpy.r.lars(data.samples,
143 data.labels[:,N.newaxis],
144 type=self.__type,
145 normalize=self.__normalize,
146 intercept=self.__intercept,
147 trace=self.__trace,
148 use_Gram=self.__use_Gram)
149 else:
150
151 self.__trained_model = rpy.r.lars(data.samples,
152 data.labels[:,N.newaxis],
153 type=self.__type,
154 normalize=self.__normalize,
155 intercept=self.__intercept,
156 trace=self.__trace,
157 use_Gram=self.__use_Gram,
158 max_steps=self.__max_steps)
159
160
161
162
163 Cp_vals = N.asarray([self.__trained_model['Cp'][str(x)]
164 for x in range(len(self.__trained_model['Cp']))])
165 self.__lowest_Cp_step = Cp_vals.argmin()
166
167
168 self.__weights = self.__trained_model['beta'][self.__lowest_Cp_step,:]
169
170
171
172
173
175 """
176 Predict the output for the provided data.
177 """
178
179
180 res = rpy.r.predict_lars(self.__trained_model,
181 data,
182 mode='step',
183 s=self.__lowest_Cp_step)
184
185
186 fit = N.asarray(res['fit'])
187 if len(fit.shape) == 0:
188
189 fit = fit.reshape( (1,) )
190 return fit
191
192
194 """Return ids of the used features
195 """
196 return N.where(N.abs(self.__weights)>0)[0]
197
198
199
201 """Returns a sensitivity analyzer for LARS."""
202 return LARSWeights(self, **kwargs)
203
204 weights = property(lambda self: self.__weights)
205
206
207
209 """`SensitivityAnalyzer` that reports the weights LARS trained
210 on a given `Dataset`.
211 """
212
213 _LEGAL_CLFS = [ LARS ]
214
215 - def _call(self, dataset=None):
216 """Extract weights from LARS classifier.
217
218 LARS always has weights available, so nothing has to be computed here.
219 """
220 clf = self.clf
221 weights = clf.weights
222
223 if __debug__:
224 debug('LARS',
225 "Extracting weights for LARS - "+
226 "Result: min=%f max=%f" %\
227 (N.min(weights), N.max(weights)))
228
229 return weights
230