|
| 1 | +from gdsctools import * |
| 2 | +import pandas as pd |
| 3 | +import pylab |
| 4 | +import json |
| 5 | + |
| 6 | +def test_regression_report(): |
| 7 | + IC = gdsctools_data("IC50_v5.csv.gz") |
| 8 | + GF = gdsctools_data("genomic_features_v5.csv.gz") |
| 9 | + |
| 10 | + PREFIX = "gdsctools_regression_" |
| 11 | + IMAGE_DIR = "images" |
| 12 | + DATA_PREFIX = "data/" + PREFIX |
| 13 | + IMAGE_PREFIX = "images/" + PREFIX |
| 14 | + |
| 15 | + gd = regression.GDSCLasso(IC, GF) |
| 16 | + DRUGIDS = gd.drugIds[0:4] |
| 17 | + |
| 18 | + config = {"boxplot_n":5, "randomness":5} |
| 19 | + |
| 20 | + # Get best model |
| 21 | + inputs = [] |
| 22 | + for drugid in DRUGIDS: |
| 23 | + res = gd.runCV(drugid, verbose=False, kfolds=10) |
| 24 | + bestmodel = gd.get_model(alpha=res.alpha) |
| 25 | + |
| 26 | + def _pngname(tag): |
| 27 | + return IMAGE_PREFIX + "%s_%s.png" % (tag, drugid) |
| 28 | + |
| 29 | + # Plot weights |
| 30 | + weights = gd.plot_weight(drugid, bestmodel) |
| 31 | + if len(weights): |
| 32 | + pylab.savefig(_pngname("weights")) |
| 33 | + pylab.close() |
| 34 | + |
| 35 | + weights = pd.DataFrame({ |
| 36 | + "weigths": res.coefficients, |
| 37 | + "features":gd.feature_names}) |
| 38 | + output = DATA_PREFIX + "weights_{}.csv".format(drugid) |
| 39 | + weights.to_csv(output, index=False) |
| 40 | + |
| 41 | + # Plot importance |
| 42 | + weights = gd.plot_importance(drugid, bestmodel) |
| 43 | + if len(weights): |
| 44 | + pylab.savefig(_pngname("importance")) |
| 45 | + pylab.close() |
| 46 | + |
| 47 | + # Boxplots |
| 48 | + boxres = gd.boxplot(drugid, model=bestmodel, n=5, |
| 49 | + bx_vert=False) |
| 50 | + if len(boxres['data']): |
| 51 | + pylab.savefig(_pngname("boxplot")) |
| 52 | + pylab.close() |
| 53 | + |
| 54 | + # Bayes factor |
| 55 | + ran = gd.check_randomness(drugid, 10, 10) |
| 56 | + pylab.savefig(_pngname("randomness")) |
| 57 | + pylab.close() |
| 58 | + results = {"drugid": int(drugid), |
| 59 | + "Rp":res.Rp, |
| 60 | + "alpha": res.alpha, |
| 61 | + "ln_alpha": res.ln_alpha, |
| 62 | + "ttest": ran['ttest_pval'], |
| 63 | + "bayes":ran['bayes_factor']} |
| 64 | + |
| 65 | + output = DATA_PREFIX + "results_{}.json".format(drugid) |
| 66 | + fh = open(output, "w") |
| 67 | + json.dump(results, fh) |
| 68 | + fh.close() |
| 69 | + inputs.append(output) |
| 70 | + |
| 71 | + # gather all results: |
| 72 | + data = [] |
| 73 | + for this in inputs: |
| 74 | + with open(this, "r") as fh: |
| 75 | + data.append(json.loads(fh.read())) |
| 76 | + df = pd.DataFrame(data) |
| 77 | + df.set_index("drugid", inplace=True) |
| 78 | + df.to_csv(DATA_PREFIX + "results.csv") |
| 79 | + |
| 80 | + |
| 81 | + inputs = [DATA_PREFIX + "weights_{}.csv".format(drugid) for drugid in DRUGIDS] |
| 82 | + df = pd.concat( |
| 83 | + [pd.read_csv(this).set_index("features") for this in inputs], |
| 84 | + axis=1) |
| 85 | + df.columns = DRUGIDS |
| 86 | + df.to_csv(DATA_PREFIX + "weights.csv") |
| 87 | + |
| 88 | + |
| 89 | + from gdsctools import regression_report |
| 90 | + report = regression_report.RegressionReport("lasso", image_dir=IMAGE_DIR, config=config) |
| 91 | + report.create_html_main() |
| 92 | + report.create_html_drug() |
| 93 | + |
| 94 | + |
| 95 | + |
0 commit comments