-
Notifications
You must be signed in to change notification settings - Fork 18.2k
/
Copy pathcopy_notebooks.py
111 lines (89 loc) · 4.24 KB
/
copy_notebooks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
"""
This script copies all notebooks from the book into the website directory, and
creates pages which wrap them and link together.
"""
import os
import nbformat
import shutil
PAGEFILE = """title: {title}
url:
save_as: {htmlfile}
Template: {template}
{{% notebook notebooks/{notebook_file} cells[{cells}] %}}
"""
INTRO_TEXT = """This website contains the full text of the [Python Data Science Handbook](http://shop.oreilly.com/product/0636920034919.do) by Jake VanderPlas; the content is available [on GitHub](https://github.com/jakevdp/PythonDataScienceHandbook) in the form of Jupyter notebooks.
The text is released under the [CC-BY-NC-ND license](https://creativecommons.org/licenses/by-nc-nd/3.0/us/legalcode), and code is released under the [MIT license](https://opensource.org/licenses/MIT).
If you find this content useful, please consider supporting the work by [buying the book](http://shop.oreilly.com/product/0636920034919.do)!
"""
def abspath_from_here(*args):
here = os.path.dirname(__file__)
path = os.path.join(here, *args)
return os.path.abspath(path)
NB_SOURCE_DIR = abspath_from_here('..', 'notebooks')
NB_DEST_DIR = abspath_from_here('content', 'notebooks')
PAGE_DEST_DIR = abspath_from_here('content', 'pages')
def copy_notebooks():
if not os.path.exists(NB_DEST_DIR):
os.makedirs(NB_DEST_DIR)
if not os.path.exists(PAGE_DEST_DIR):
os.makedirs(PAGE_DEST_DIR)
nblist = sorted(nb for nb in os.listdir(NB_SOURCE_DIR)
if nb.endswith('.ipynb'))
name_map = {nb: nb.rsplit('.', 1)[0].lower() + '.html'
for nb in nblist}
figsource = abspath_from_here('..', 'notebooks', 'figures')
figdest = abspath_from_here('content', 'figures')
if os.path.exists(figdest):
shutil.rmtree(figdest)
shutil.copytree(figsource, figdest)
figurelist = os.listdir(abspath_from_here('content', 'figures'))
figure_map = {os.path.join('figures', fig) : os.path.join('/PythonDataScienceHandbook/figures', fig)
for fig in figurelist}
for nb in nblist:
base, ext = os.path.splitext(nb)
print('-', nb)
content = nbformat.read(os.path.join(NB_SOURCE_DIR, nb),
as_version=4)
if nb == 'Index.ipynb':
# content[0] is the title
# content[1] is the cover image
# content[2] is the license
cells = '1:'
template = 'page'
title = 'Python Data Science Handbook'
content.cells[2].source = INTRO_TEXT
else:
# content[0] is the book information
# content[1] is the navigation bar
# content[2] is the title
cells = '2:'
template = 'booksection'
title = content.cells[2].source
if not title.startswith('#') or len(title.splitlines()) > 1:
raise ValueError('title not found in third cell')
title = title.lstrip('#').strip()
# put nav below title
content.cells.insert(0, content.cells.pop(2))
# Replace internal URLs and figure links in notebook
for cell in content.cells:
if cell.cell_type == 'markdown':
for nbname, htmlname in name_map.items():
if nbname in cell.source:
cell.source = cell.source.replace(nbname, htmlname)
for figname, newfigname in figure_map.items():
if figname in cell.source:
cell.source = cell.source.replace(figname, newfigname)
if cell.source.startswith("<!--NAVIGATION-->"):
# Undo replacement of notebook link in the colab badge
cell.source = nb.join(cell.source.rsplit(name_map[nb], 1))
nbformat.write(content, os.path.join(NB_DEST_DIR, nb))
pagefile = os.path.join(PAGE_DEST_DIR, base + '.md')
htmlfile = base.lower() + '.html'
with open(pagefile, 'w') as f:
f.write(PAGEFILE.format(title=title,
htmlfile=htmlfile,
notebook_file=nb,
template=template,
cells=cells))
if __name__ == '__main__':
copy_notebooks()