@@ -28,7 +28,7 @@ class IC50Cluster(IC50):
28
28
account for this feature, the IC50Cluster will rename them columns and
29
29
transforming the data as follows.
30
30
31
- Consider the case of the DRUG 1211. It appears 3 times in the original
31
+ Consider the case of the DRUG 1211. It appears 3 times in the original
32
32
data::
33
33
34
34
Drug_1211_0.15625_IC50
@@ -67,8 +67,8 @@ class IC50Cluster(IC50):
67
67
several concentrations is large, then they are studied independently.
68
68
Otherwise they are merged.
69
69
70
- In the final dataframe, the columns names are transformed into unique
71
- identifiers like in the IC50 class by removing the ``Drug_`` prefix and
70
+ In the final dataframe, the columns names are transformed into unique
71
+ identifiers like in the IC50 class by removing the ``Drug_`` prefix and
72
72
````_conc_IC50`` suffix.
73
73
74
74
The :attr:`mapping` contains the mapping between new and old identifiers.
@@ -81,7 +81,7 @@ def __init__(self, ic50, ratio_threshold=10, verbose=True, cluster=True):
81
81
:param ic50:
82
82
:param int ratio_threshold:
83
83
:param bool verbose:
84
- :param bool cluster: may be useful to not cluster the data for
84
+ :param bool cluster: may be useful to not cluster the data for
85
85
testing or debugging
86
86
87
87
"""
@@ -224,54 +224,60 @@ def mkdir(self, name):
224
224
225
225
226
226
class GDSC (GDSCBase ):
227
- """Wrapper of the :class:`~gdcstools.anova.ANOVA` class and reports to
228
- analyse all TCGA Tissues and companies automatically.
227
+ """Wrapper of the :class:`~gdcstools.anova.ANOVA` class and reports to
228
+ analyse all TCGA Tissues and companies automatically while creating summary
229
+ HTML pages.
229
230
230
- First, one need to provide the unique IC50 files. Second, the DRugDecode
231
- file (see :class:``) must be provided to convert identifiers into
232
- drug names within the reports. Third, genomic feature files must be
233
- provided for each tissue.
231
+ First, one need to provide an unique IC50 file. Second, the DrugDecode
232
+ file (see :class:`~gdsctools.readers.DrugDecode`) must be provided
233
+ with the DRUG identifiers and their corresponding names. Third,
234
+ a set of genomic feature files must be provided for each :term:`TCGA`
235
+ tissue.
234
236
235
- First, create all main analysis that include all drugs::
236
237
238
+ You then create a GDSC instance::
239
+
240
+ from gdsctools import GDSC
237
241
gg = GDSC('IC50_v18.csv', 'DRUG_DECODE.txt',
238
242
genomic_feature_pattern='GF*csv')
239
243
240
- Then run the analysis. This will launch an ANOVA analysis for each
241
- tissue as well as a dedicated HTML report for each tissue considered.
244
+ At that stage you may want to change the settings, e.g::
242
245
243
- This may take lots of time. On v18, on an i7 core using 1 CPU
244
- this takes about 1 hour.30 minutes
246
+ gg.settings.FDR_threshold = 20
245
247
246
- You should now have a directory called **tissue_packages** with about
247
- 20 directories for each TCGA GF file. Keep that in a safe place or
248
- you will have to restart the analysis
248
+ Then run the analysis::
249
249
250
- Second, split those data just created for each specific proprietary
251
- compounds. For instance::
250
+ gg.analysis()
252
251
253
- gg.create_data_packages_for_companies(['AZ'])
252
+ This will launch an ANOVA analysis for each TCGA tissue + PANCAN case
253
+ if provided. This will also create a data package for each tissue.
254
+ The data packages are stored in ./tissue_packages directory.
254
255
255
- or for all in one go::
256
+ Since all private and public drugs are stored together, the next step is
257
+ to create data packages for each company::
256
258
257
259
gg.create_data_packages_for_companies()
258
260
259
- Third, create some summary pages ::
261
+ you may select a specific one if you wish ::
260
262
261
- gg.create_summary_pages( )
263
+ gg.create_data_packages_for_companies(['AZ'] )
262
264
263
- The last step is fast (a few seconds) and create index.html in the
264
- tissue_package directory and each proprietary directory.
265
+ Finally, create some summary pages::
265
266
267
+ gg.create_summary_pages()
266
268
269
+ You entry point is an HTML file called **index.html**
267
270
"""
268
271
def __init__ (self , ic50 , drug_decode ,
269
272
genomic_feature_pattern = "GF_*csv" ,
270
273
main_directory = "tissue_packages" , verbose = True ):
271
- """
274
+ """.. rubric:: Constructor
275
+
276
+ :param ic50: an :class:`~gdsctools.readers.IC50` file.
277
+ :param drug_decode: an :class:`~gdsctools.readers.DrugDecode` file.
278
+ :param genomic_feature_pattern: a glob to a set of
279
+ :class:`~gdsctools.readers.GenomicFeature` files.
272
280
273
- ic50 must be a filename (not IC50 instance) because it will be used for
274
- each genomic features file
275
281
"""
276
282
super (GDSC , self ).__init__ (genomic_feature_pattern , verbose = verbose )
277
283
assert isinstance (ic50 , str )
@@ -294,7 +300,7 @@ def __init__(self, ic50, drug_decode,
294
300
# quick test on 15 features
295
301
self .test = False
296
302
297
- def analyse (self , onweb = False , multicore = None ):
303
+ def analyse (self , multicore = None ):
298
304
"""Launch ANOVA analysis and creating data package for each tissue.
299
305
300
306
:param bool onweb: By default, reports are created
@@ -306,9 +312,9 @@ def analyse(self, onweb=False, multicore=None):
306
312
self .mkdir (self .main_directory )
307
313
# First analyse all TCGA cases + PANCAN once for all and
308
314
# store all the results in a dictionary.
309
- self ._analyse_all (onweb = onweb , multicore = multicore )
315
+ self ._analyse_all (multicore = multicore )
310
316
311
- def _analyse_all (self , onweb , multicore = None ):
317
+ def _analyse_all (self , multicore = None ):
312
318
for gf_filename in sorted (self .gf_filenames ):
313
319
tcga = gf_filename .split ("_" )[1 ].split ('.' )[0 ]
314
320
print (purple ('======================== Analysing %s data' % tcga ))
@@ -340,9 +346,11 @@ def _analyse_all(self, onweb, multicore=None):
340
346
self .report = ANOVAReport (an )
341
347
self .report .settings .savefig = True
342
348
343
- self .report .create_html_pages (onweb = onweb )
349
+ self .report .create_html_pages (onweb = False )
344
350
345
351
def create_data_packages_for_companies (self , companies = None ):
352
+ """Creates a data package for each company found in the DrugDecode file
353
+ """
346
354
##########################################################
347
355
#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!#
348
356
# #
@@ -429,10 +437,10 @@ def drug_to_keep(drug):
429
437
an .settings .analysis_type = tcga
430
438
431
439
# Now we create the report
432
- self .report = ANOVAReport (an , results ,
440
+ self .report = ANOVAReport (an , results ,
433
441
drug_decode = drug_decode_company ,
434
442
verbose = self .verbose )
435
- self .report .company = company
443
+ self .report .company = company
436
444
self .report .settings .analysis_type = tcga
437
445
self .report .create_html_main (False )
438
446
self .report .create_html_manova (False )
@@ -449,20 +457,21 @@ def _get_companies(self):
449
457
companies = property (_get_companies )
450
458
451
459
def create_summary_pages (self ):
452
- """
460
+ """Create summary pages
453
461
454
462
Once the main analyis is done (:meth:`analyse`), and the company
455
463
packages have been created (:meth:`create_data_packages_for_companies`),
456
464
you can run this method that will creade a summary HTML page
457
465
(index.html) for the tissue, and a similar summary HTML page for the
458
- tissues of each company. Finally, an HTML summary page for the companies
459
- is also created.
466
+ tissues of each company. Finally, an HTML summary page for the
467
+ companies is also created.
460
468
461
469
The final tree direcorty looks like::
462
470
463
471
464
472
|-- index.html
465
473
|-- company_packages
474
+ | |-- index.html
466
475
| |-- Company1
467
476
| | |-- Tissue1
468
477
| | |-- Tissue2
@@ -472,9 +481,9 @@ def create_summary_pages(self):
472
481
| | |-- Tissue2
473
482
| | |-- index.html
474
483
|-- tissue_packages
484
+ | |-- index.html
475
485
| |-- Tissue1
476
486
| |-- Tissue2
477
- | |-- index.html
478
487
479
488
480
489
"""
@@ -513,7 +522,7 @@ def _create_main_index(self):
513
522
html_page .jinja ['tissue_directory' ] = self .main_directory
514
523
html_page .write ()
515
524
516
- def _create_summary_pages (self , main_directory , verbose = True ,
525
+ def _create_summary_pages (self , main_directory , verbose = True ,
517
526
company = None ):
518
527
# Read all directories in tissue_packages
519
528
0 commit comments