This repository was archived by the owner on Dec 6, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 60
/
Copy pathplot_stability_scores.py
55 lines (39 loc) · 1.79 KB
/
plot_stability_scores.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
"""
===========================
Plotting stability scores
===========================
An example plot of the stability scores for each variable after fitting :class:`stability_selection.stability_selection.StabilitySelection`
"""
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
from stability_selection import StabilitySelection, plot_stability_path
def _generate_dummy_classification_data(p=1000, n=1000, k=5, random_state=123321):
rng = check_random_state(random_state)
X = rng.normal(loc=0.0, scale=1.0, size=(n, p))
betas = np.zeros(p)
important_betas = np.sort(rng.choice(a=np.arange(p), size=k))
betas[important_betas] = rng.uniform(size=k)
probs = 1 / (1 + np.exp(-1 * np.matmul(X, betas)))
y = (probs > 0.5).astype(int)
return X, y, important_betas
if __name__ == '__main__':
n, p, k = 500, 1000, 5
X, y, important_betas = _generate_dummy_classification_data(n=n, k=k)
base_estimator = Pipeline([
('scaler', StandardScaler()),
('model', LogisticRegression(penalty='l1'))
])
selector = StabilitySelection(base_estimator=base_estimator, lambda_name='model__C',
lambda_grid=np.logspace(-5, -1, 50))
selector.fit(X, y)
fig, ax = plot_stability_path(selector)
fig.show()
selected_variables = selector.get_support(indices=True)
selected_scores = selector.stability_scores_.max(axis=1)
print('Selected variables are:')
print('-----------------------')
for idx, (variable, score) in enumerate(zip(selected_variables, selected_scores[selected_variables])):
print('Variable %d: [%d], score %.3f' % (idx + 1, variable, score))