Skip to content

Commit e5c7e7b

Browse files
authored
Merge branch 'main' into v1.5update
2 parents a776a73 + dfdc281 commit e5c7e7b

10 files changed

+13364
-16385
lines changed

notebooks/introductory/Part_2_Dataset_Analysis_and_Preparation.html

Lines changed: 13316 additions & 16345 deletions
Large diffs are not rendered by default.

notebooks/introductory/Part_2_Dataset_Analysis_and_Preparation.ipynb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,8 @@
5959
},
6060
"outputs": [],
6161
"source": [
62-
"DATA_DIR = \"./data/\""
62+
"DATA_DIR = \"./data_p2/\"\n",
63+
"! DATA_DIR=\"./data_p2/\""
6364
]
6465
},
6566
{
@@ -84,8 +85,8 @@
8485
"outputs": [],
8586
"source": [
8687
"# Load files if in google colab, otherwise skip this step\n",
87-
"! wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/noteevents.csv -P ./data/\n",
88-
"! wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/patients.csv -P ./data/"
88+
"! wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/noteevents.csv -P $DATA_DIR\n",
89+
"! wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/patients.csv -P $DATA_DIR"
8990
]
9091
},
9192
{

notebooks/introductory/Part_3_1_Building_a_Concept_Database_and_Vocabulary.ipynb

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,8 @@
365365
},
366366
"outputs": [],
367367
"source": [
368-
"DATA_DIR = \"./data/\""
368+
"DATA_DIR = \"./data_p3.1/\"\n",
369+
"! DATA_DIR=\"./data_p3.1/\""
369370
]
370371
},
371372
{
@@ -421,9 +422,9 @@
421422
],
422423
"source": [
423424
"# Load files if in google colab, otherwise skip this step\n",
424-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/cdb_simple.csv -P ./data/\n",
425-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/cdb_advanced.csv -P ./data/\n",
426-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/vocab_data.txt -P ./data/"
425+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/cdb_simple.csv -P $DATA_DIR\n",
426+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/cdb_advanced.csv -P $DATA_DIR\n",
427+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/vocab_data.txt -P $DATA_DIR"
427428
]
428429
},
429430
{
@@ -583,7 +584,7 @@
583584
],
584585
"source": [
585586
"# If you want to add words manually (one by one) use:\n",
586-
"vocab.add_word(\"test\", cnt=31, vec=[1.42, 1.44, 1.55], replace=True)\n",
587+
"vocab.add_word(\"test\", cnt=31, vec=np.array([1.42, 1.44, 1.55]), replace=True)\n",
587588
"vocab.vocab.keys()"
588589
]
589590
},

notebooks/introductory/Part_3_2_Extracting_Diseases_from_Electronic_Health_Records.ipynb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,8 @@
329329
},
330330
"outputs": [],
331331
"source": [
332-
"DATA_DIR = \"./data/\"\n",
332+
"DATA_DIR = \"./data_p3.2/\"\n",
333+
"! DATA_DIR=\"./data_p3.2/\"\n",
333334
"model_pack_path = DATA_DIR + \"medmen_wstatus_2021_oct.zip\""
334335
]
335336
},
@@ -375,8 +376,8 @@
375376
],
376377
"source": [
377378
"# Download the models and required data\n",
378-
"!wget https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip -P ./data/\n",
379-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/pt_notes.csv -P ./data/"
379+
"!wget -N https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip -P $DATA_DIR\n",
380+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/pt_notes.csv -P $DATA_DIR"
380381
]
381382
},
382383
{

notebooks/introductory/Part_4_1_ByteLevelBPETokenizer_and_Embeddings.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13381,7 +13381,8 @@
1338113381
<div class="prompt input_prompt">In&nbsp;[2]:</div>
1338213382
<div class="inner_cell">
1338313383
<div class="input_area">
13384-
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">DATA_DIR</span> <span class="o">=</span> <span class="s2">&quot;./data/&quot;</span>
13384+
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">DATA_DIR</span> <span class="o">=</span> <span class="s2">&quot;./data_p4.1/&quot;</span>
13385+
<span class="o">!</span> <span class="nv">DATA_DIR</span><span class="o">=</span><span class="s2">&quot;./data_p4.1/&quot;</span>
1338513386
</pre></div>
1338613387

1338713388
</div>
@@ -13394,9 +13395,8 @@
1339413395
<div class="prompt input_prompt">In&nbsp;[3]:</div>
1339513396
<div class="inner_cell">
1339613397
<div class="input_area">
13397-
<div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span>mkdir ./data
13398-
<span class="o">!</span>mkdir ./models
13399-
<span class="o">!</span>wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/noteevents.csv -P ./data/
13398+
<div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span>mkdir ./models
13399+
<span class="o">!</span>wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/noteevents.csv -P <span class="nv">$DATA_DIR</span>
1340013400
</pre></div>
1340113401

1340213402
</div>

notebooks/introductory/Part_4_1_ByteLevelBPETokenizer_and_Embeddings.ipynb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,8 @@
296296
},
297297
"outputs": [],
298298
"source": [
299-
"DATA_DIR = \"./data/\""
299+
"DATA_DIR = \"./data_p4.1/\"\n",
300+
"! DATA_DIR=\"./data_p4.1/\""
300301
]
301302
},
302303
{
@@ -331,9 +332,8 @@
331332
}
332333
],
333334
"source": [
334-
"!mkdir ./data\n",
335335
"!mkdir ./models\n",
336-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/noteevents.csv -P ./data/"
336+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/noteevents.csv -P $DATA_DIR"
337337
]
338338
},
339339
{

notebooks/introductory/Part_4_2_Supervised_Training_and_Meta_annotations.html

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13456,7 +13456,8 @@
1345613456
<div class="prompt input_prompt">In&nbsp;[2]:</div>
1345713457
<div class="inner_cell">
1345813458
<div class="input_area">
13459-
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">DATA_DIR</span> <span class="o">=</span> <span class="s2">&quot;./data/&quot;</span>
13459+
<div class=" highlight hl-ipython3"><pre><span></span><span class="n">DATA_DIR</span> <span class="o">=</span> <span class="s2">&quot;./data_p4.2/&quot;</span>
13460+
<span class="o">!</span> <span class="nv">DATA_DIR</span><span class="o">=</span><span class="s2">&quot;./data_p4.2/&quot;</span>
1346013461
<span class="n">vocab_path</span> <span class="o">=</span> <span class="n">DATA_DIR</span> <span class="o">+</span> <span class="s2">&quot;vocab.dat&quot;</span>
1346113462
<span class="n">cdb_path</span> <span class="o">=</span> <span class="n">DATA_DIR</span> <span class="o">+</span> <span class="s2">&quot;cdb-medmen-v1_2.dat&quot;</span>
1346213463
</pre></div>
@@ -13472,13 +13473,13 @@
1347213473
<div class="inner_cell">
1347313474
<div class="input_area">
1347413475
<div class=" highlight hl-ipython3"><pre><span></span><span class="c1"># Download the models and required data</span>
13475-
<span class="o">!</span>wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/MedCAT_Export.json -P ./data/
13476+
<span class="o">!</span>wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/MedCAT_Export.json -P <span class="nv">$DATA_DIR</span>
1347613477
<span class="c1"># You can also use the models created in Part 4.1 of the Tutorial</span>
13477-
<span class="o">!</span>wget https://medcat.rosalind.kcl.ac.uk/media/mc_status.zip -P ./data/
13478+
<span class="o">!</span>wget -N https://medcat.rosalind.kcl.ac.uk/media/mc_status.zip -P <span class="nv">$DATA_DIR</span>
1347813479

1347913480
<span class="c1"># Get MedCAT models components (Alternatively you can use a previously created MedCAT model packs)</span>
13480-
<span class="o">!</span>wget https://medcat.rosalind.kcl.ac.uk/media/vocab.dat -P ./data/
13481-
<span class="o">!</span>wget https://medcat.rosalind.kcl.ac.uk/media/cdb-medmen-v1_2.dat -P ./data/
13481+
<span class="o">!</span>wget -N https://medcat.rosalind.kcl.ac.uk/media/vocab.dat -P <span class="nv">$DATA_DIR</span>
13482+
<span class="o">!</span>wget -N https://medcat.rosalind.kcl.ac.uk/media/cdb-medmen-v1_2.dat -P <span class="nv">$DATA_DIR</span>
1348213483
</pre></div>
1348313484

1348413485
</div>
@@ -17826,7 +17827,7 @@ <h3 id="MetaCAT">MetaCAT<a class="anchor-link" href="#MetaCAT">&#182;</a></h3><p
1782617827
<div class="prompt input_prompt">In&nbsp;[16]:</div>
1782717828
<div class="inner_cell">
1782817829
<div class="input_area">
17829-
<div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span>unzip data/mc_status.zip
17830+
<div class=" highlight hl-ipython3"><pre><span></span><span class="o">!</span>unzip <span class="nv">$DATA_DIR</span>/mc_status.zip
1783017831
</pre></div>
1783117832

1783217833
</div>

notebooks/introductory/Part_4_2_Supervised_Training_and_Meta_annotations.ipynb

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -366,7 +366,8 @@
366366
},
367367
"outputs": [],
368368
"source": [
369-
"DATA_DIR = \"./data/\"\n",
369+
"DATA_DIR = \"./data_p4.2/\"\n",
370+
"! DATA_DIR=\"./data_p4.2/\"\n",
370371
"vocab_path = DATA_DIR + \"vocab.dat\"\n",
371372
"cdb_path = DATA_DIR + \"cdb-medmen-v1_2.dat\""
372373
]
@@ -435,13 +436,13 @@
435436
],
436437
"source": [
437438
"# Download the models and required data\n",
438-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/MedCAT_Export.json -P ./data/\n",
439+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/MedCAT_Export.json -P $DATA_DIR\n",
439440
"# You can also use the models created in Part 4.1 of the Tutorial\n",
440-
"!wget https://medcat.rosalind.kcl.ac.uk/media/mc_status.zip -P ./data/\n",
441+
"!wget -N https://medcat.rosalind.kcl.ac.uk/media/mc_status.zip -P $DATA_DIR\n",
441442
"\n",
442443
"# Get MedCAT models components (Alternatively you can use a previously created MedCAT model packs)\n",
443-
"!wget https://medcat.rosalind.kcl.ac.uk/media/vocab.dat -P ./data/\n",
444-
"!wget https://medcat.rosalind.kcl.ac.uk/media/cdb-medmen-v1_2.dat -P ./data/"
444+
"!wget -N https://medcat.rosalind.kcl.ac.uk/media/vocab.dat -P $DATA_DIR\n",
445+
"!wget -N https://medcat.rosalind.kcl.ac.uk/media/cdb-medmen-v1_2.dat -P $DATA_DIR"
445446
]
446447
},
447448
{
@@ -4719,7 +4720,7 @@
47194720
}
47204721
],
47214722
"source": [
4722-
"!unzip data/mc_status.zip"
4723+
"!unzip $DATA_DIR/mc_status.zip"
47234724
]
47244725
},
47254726
{

notebooks/introductory/Part_4_3_Annotating_documents_with_the_full_MedCAT_pipeline_with_MetaAnnotations.ipynb

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -326,8 +326,10 @@
326326
},
327327
"outputs": [],
328328
"source": [
329-
"DATA_DIR = \"./data/\"\n",
329+
"DATA_DIR = \"./data_p4.3/\"\n",
330+
"! DATA_DIR=\"./data_p4.3/\"\n",
330331
"MODEL_DIR = \"./models/\"\n",
332+
"! MODEL_DIR=\"./models/\"\n",
331333
"model_pack_path = MODEL_DIR + \"medmen_wstatus_2021_oct.zip\""
332334
]
333335
},
@@ -384,11 +386,11 @@
384386
],
385387
"source": [
386388
"# Download the models and required data\n",
387-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/pt_notes.csv -P ./data/\n",
388-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/MedCAT_Export.json -P ./data/\n",
389+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/pt_notes.csv -P $DATA_DIR\n",
390+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/MedCAT_Export.json -P $DATA_DIR\n",
389391
"\n",
390392
"# Download the medcat modelpack\n",
391-
"!wget https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip -P ./models/\n"
393+
"!wget -N https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip -P $MODEL_DIR\n"
392394
]
393395
},
394396
{
@@ -496,7 +498,7 @@
496498
}
497499
],
498500
"source": [
499-
"!wget https://raw.githubusercontent.com/CogStack/MedCAT/master/tutorial/data/pt_notes.csv -P ./data/"
501+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCAT/master/tutorial/data/pt_notes.csv -P $DATA_DIR"
500502
]
501503
},
502504
{

notebooks/introductory/Part_5_Prevalence_of_Physical_and_Mental_Diseases.ipynb

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,8 @@
320320
},
321321
"outputs": [],
322322
"source": [
323-
"DATA_DIR = \"./data/\"\n",
323+
"DATA_DIR = \"./data_p5/\"\n",
324+
"! DATA_DIR=\"./data_p5/\"\n",
324325
"model_pack_path = DATA_DIR + \"medmen_wstatus_2021_oct.zip\""
325326
]
326327
},
@@ -397,11 +398,11 @@
397398
],
398399
"source": [
399400
"# Download the models and required data\n",
400-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/pt_notes.csv -P ./data/\n",
401-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/MedCAT_Export.json -P ./data/\n",
402-
"!wget https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/cui_location.json -P ./data/\n",
401+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/pt_notes.csv -P $DATA_DIR\n",
402+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/MedCAT_Export.json -P $DATA_DIR\n",
403+
"!wget -N https://raw.githubusercontent.com/CogStack/MedCATtutorials/main/notebooks/introductory/data/cui_location.json -P $DATA_DIR\n",
403404
"\n",
404-
"!wget https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip -P ./data/"
405+
"!wget -N https://medcat.rosalind.kcl.ac.uk/media/medmen_wstatus_2021_oct.zip -P $DATA_DIR"
405406
]
406407
},
407408
{

0 commit comments

Comments
 (0)