Skip to content

Commit 1dfa4a9

Browse files
committed
add missing test file
1 parent 7bf55d7 commit 1dfa4a9

File tree

1 file changed

+95
-0
lines changed

1 file changed

+95
-0
lines changed
+95
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from gdsctools import *
2+
import pandas as pd
3+
import pylab
4+
import json
5+
6+
def test_regression_report():
7+
IC = gdsctools_data("IC50_v5.csv.gz")
8+
GF = gdsctools_data("genomic_features_v5.csv.gz")
9+
10+
PREFIX = "gdsctools_regression_"
11+
IMAGE_DIR = "images"
12+
DATA_PREFIX = "data/" + PREFIX
13+
IMAGE_PREFIX = "images/" + PREFIX
14+
15+
gd = regression.GDSCLasso(IC, GF)
16+
DRUGIDS = gd.drugIds[0:4]
17+
18+
config = {"boxplot_n":5, "randomness":5}
19+
20+
# Get best model
21+
inputs = []
22+
for drugid in DRUGIDS:
23+
res = gd.runCV(drugid, verbose=False, kfolds=10)
24+
bestmodel = gd.get_model(alpha=res.alpha)
25+
26+
def _pngname(tag):
27+
return IMAGE_PREFIX + "%s_%s.png" % (tag, drugid)
28+
29+
# Plot weights
30+
weights = gd.plot_weight(drugid, bestmodel)
31+
if len(weights):
32+
pylab.savefig(_pngname("weights"))
33+
pylab.close()
34+
35+
weights = pd.DataFrame({
36+
"weigths": res.coefficients,
37+
"features":gd.feature_names})
38+
output = DATA_PREFIX + "weights_{}.csv".format(drugid)
39+
weights.to_csv(output, index=False)
40+
41+
# Plot importance
42+
weights = gd.plot_importance(drugid, bestmodel)
43+
if len(weights):
44+
pylab.savefig(_pngname("importance"))
45+
pylab.close()
46+
47+
# Boxplots
48+
boxres = gd.boxplot(drugid, model=bestmodel, n=5,
49+
bx_vert=False)
50+
if len(boxres['data']):
51+
pylab.savefig(_pngname("boxplot"))
52+
pylab.close()
53+
54+
# Bayes factor
55+
ran = gd.check_randomness(drugid, 10, 10)
56+
pylab.savefig(_pngname("randomness"))
57+
pylab.close()
58+
results = {"drugid": int(drugid),
59+
"Rp":res.Rp,
60+
"alpha": res.alpha,
61+
"ln_alpha": res.ln_alpha,
62+
"ttest": ran['ttest_pval'],
63+
"bayes":ran['bayes_factor']}
64+
65+
output = DATA_PREFIX + "results_{}.json".format(drugid)
66+
fh = open(output, "w")
67+
json.dump(results, fh)
68+
fh.close()
69+
inputs.append(output)
70+
71+
# gather all results:
72+
data = []
73+
for this in inputs:
74+
with open(this, "r") as fh:
75+
data.append(json.loads(fh.read()))
76+
df = pd.DataFrame(data)
77+
df.set_index("drugid", inplace=True)
78+
df.to_csv(DATA_PREFIX + "results.csv")
79+
80+
81+
inputs = [DATA_PREFIX + "weights_{}.csv".format(drugid) for drugid in DRUGIDS]
82+
df = pd.concat(
83+
[pd.read_csv(this).set_index("features") for this in inputs],
84+
axis=1)
85+
df.columns = DRUGIDS
86+
df.to_csv(DATA_PREFIX + "weights.csv")
87+
88+
89+
from gdsctools import regression_report
90+
report = regression_report.RegressionReport("lasso", image_dir=IMAGE_DIR, config=config)
91+
report.create_html_main()
92+
report.create_html_drug()
93+
94+
95+

0 commit comments

Comments
 (0)