|
1 |
| -from itertools import pairwise |
2 | 1 | import os
|
3 | 2 | import re
|
| 3 | +from collections import defaultdict |
| 4 | + |
| 5 | +# 获取 leetcode 题目,结构如下: |
| 6 | +""" |
| 7 | +- solution |
| 8 | + - 0000-0099 |
| 9 | + - 0001.Two Sum/README.md |
| 10 | + - 0002.Add Two Numbers/README.md |
| 11 | + - 0100-0199 |
| 12 | + - 0100.Same Tree/README.md |
| 13 | + - 0101.Symmetric Tree/README.md |
| 14 | +- lcof |
| 15 | + - 面试题03. 数组中重复的数字/README.md |
| 16 | + - 面试题04. 二维数组中的查找/README.md |
| 17 | +- lcof2 |
| 18 | + - 剑指 Offer II 001. 整数除法/README.md |
| 19 | + - 剑指 Offer II 002. 二进制加法/README.md |
| 20 | +- lcci |
| 21 | + - 01.01.Is Unique/README.md |
| 22 | + - 01.02.Check Permutation/README.md |
| 23 | +""" |
| 24 | + |
| 25 | +# 生成 leetcode 题目导航 |
| 26 | +""" |
| 27 | +nav: |
| 28 | + - LeetCode: |
| 29 | + - 1. 两数之和: lc/1.md |
| 30 | + - 2. 两数相加: lc/2.md |
| 31 | + - 100. 相同的树: lc/100.md |
| 32 | + - 剑指 Offer(第 2 版): |
| 33 | + - 面试题3. 数组中重复的数字: lcof/3.md |
| 34 | + - 剑指 Offer(专项突击版): |
| 35 | + - 1. 整数除法: lcof2/1.md |
| 36 | + - 程序员面试金典(第 6 版): |
| 37 | + - 面试题 01.01. 判定字符是否唯一: lcci/1.1.md |
| 38 | +""" |
4 | 39 |
|
5 |
| -sorted_suffixes = [ |
6 |
| - "py", |
7 |
| - "java", |
8 |
| - "cpp", |
9 |
| - "go", |
10 |
| - "ts", |
11 |
| - "rs", |
12 |
| - "js", |
13 |
| - "cs", |
14 |
| - "php", |
15 |
| - "c", |
16 |
| - "scala", |
17 |
| - "swift", |
18 |
| - "rb", |
19 |
| - "kt", |
20 |
| - "nim", |
21 |
| - "sql", |
22 |
| -] |
23 | 40 | code_dict = {
|
24 | 41 | "py": ("Python3", "python"),
|
25 | 42 | "java": ("Java", "java"),
|
|
39 | 56 | "sql": ("MySQL", "sql"),
|
40 | 57 | }
|
41 | 58 |
|
| 59 | +mapping = {lang: name for name, lang in code_dict.values()} |
42 | 60 |
|
43 |
| -# 抽取代码块 |
44 |
| -def extract_code(): |
45 |
| - paths = [] |
46 |
| - for root, _, files in os.walk(os.getcwd()): |
47 |
| - for file in files: |
48 |
| - path = root + "/" + file |
49 |
| - if "node_modules" in path or "__pycache__" in path or ".git" in path: |
50 |
| - continue |
51 |
| - if root == "D:\github-repo\leetcode": |
52 |
| - continue |
53 |
| - if path.endswith("README.md"): |
54 |
| - paths.append(path) |
55 |
| - for path in paths: |
56 |
| - with open(path, "r", encoding="utf-8") as f: |
57 |
| - content = f.read() |
58 |
| - mark = "<!-- tabs:start -->" |
59 |
| - i = content.find(mark) |
60 |
| - if i == -1: |
61 |
| - continue |
62 |
| - content = content[i + len(mark) :] |
63 |
| - for suffix, (_, suf) in code_dict.items(): |
64 |
| - res = re.findall(f"```{suf}\n(.*?)```", content, re.S) |
65 |
| - if not res: |
66 |
| - continue |
67 |
| - cnt = 1 |
68 |
| - for block in res: |
69 |
| - if not block or not block.strip(): |
70 |
| - continue |
71 |
| - if suf in ["java", "cpp", "go", "c"]: |
72 |
| - block = block.rstrip() |
73 |
| - name = f"{path[:path.rfind('/')]}/Solution{'' if cnt == 1 else str(cnt)}.{suffix}" |
74 |
| - with open(name, "w", encoding="utf-8") as f: |
75 |
| - f.write(block) |
76 |
| - cnt += 1 |
77 |
| - |
78 |
| - |
79 |
| -def parse_content(content, start, end, titles): |
80 |
| - i = content.find(start) |
81 |
| - if i == -1: |
82 |
| - return [] |
83 |
| - j = content.find(end) |
84 |
| - if j == -1: |
85 |
| - return [] |
86 |
| - content = content[i + len(start) : j] |
87 |
| - blocks = [] |
88 |
| - idx = [content.find(title) for title in titles] |
89 |
| - for l, r in pairwise(idx): |
90 |
| - block = content[l:r].strip() |
91 |
| - if not block: |
92 |
| - continue |
93 |
| - line = block.split("\n")[0] |
94 |
| - method_name = line[2:-2] |
95 |
| - block = block.replace(line, f"### {method_name}") |
96 |
| - blocks.append(block) |
97 |
| - return blocks |
98 |
| - |
99 |
| - |
100 |
| -def extract_solution_paragraph(): |
| 61 | +with open("mkdocs.yml", "r", encoding="utf-8") as f: |
| 62 | + config = f.read() |
| 63 | + |
| 64 | +with open("mkdocs-en.yml", "r", encoding="utf-8") as f: |
| 65 | + en_config = f.read() |
| 66 | + |
| 67 | + |
| 68 | +def get_paths(dirs: str, m: int): |
| 69 | + dirs = 'main' + os.sep + dirs |
101 | 70 | paths = []
|
102 |
| - for root, _, files in os.walk(os.getcwd()): |
| 71 | + for root, _, files in os.walk(dirs): |
103 | 72 | for file in files:
|
104 |
| - path = root + "/" + file |
105 |
| - if "node_modules" in path or "__pycache__" in path or ".git" in path: |
106 |
| - continue |
107 |
| - if root == "D:\github-repo\leetcode": |
108 |
| - continue |
109 |
| - if path.endswith("README.md") or path.endswith("README_EN.md"): |
110 |
| - paths.append(path) |
111 |
| - for path in paths: |
112 |
| - with open(path, "r", encoding="utf-8") as f: |
113 |
| - content = f.read() |
| 73 | + file_name = os.path.join(root, file) |
| 74 | + if file.endswith(".md") and len(file_name.split(os.sep)) == m: |
| 75 | + paths.append(file_name) |
| 76 | + return paths |
114 | 77 |
|
115 |
| - is_cn = path.endswith("README.md") |
116 |
| - if is_cn: |
117 |
| - blocks = parse_content( |
118 |
| - content, |
119 |
| - "## 解法", |
120 |
| - "<!-- tabs:start -->", |
121 |
| - ["**方法一:", "**方法二:", "**方法三:", "**方法四:"], |
122 |
| - ) |
123 |
| - else: |
124 |
| - print(path) |
125 |
| - blocks = parse_content( |
126 |
| - content, |
127 |
| - "## Solutions", |
128 |
| - "<!-- tabs:start -->", |
129 |
| - ["**Solution 1:", "**Solution 2:", "**Solution 3:", "**Solution 4:"], |
130 |
| - ) |
131 | 78 |
|
132 |
| - prefix = path[: path.rfind("/")] |
133 |
| - tab_start = "<!-- tabs:start -->" |
134 |
| - tab_end = "<!-- tabs:end -->" |
135 |
| - for i in range(1, 5): |
136 |
| - codes = [] |
137 |
| - for suf in sorted_suffixes: |
138 |
| - seq = '' if i == 1 else str(i) |
139 |
| - file_name = f"{prefix}/Solution{seq}.{suf}" |
140 |
| - try: |
141 |
| - with open(file_name, "r", encoding="utf-8") as f: |
142 |
| - code = f.read().strip() |
143 |
| - code = '```' + code_dict[suf][1] + '\n' + code + '\n```' |
144 |
| - codes.append(code) |
145 |
| - except: |
146 |
| - continue |
147 |
| - if codes: |
148 |
| - if i > len(blocks): |
149 |
| - seq_dict = {1: '一', 2: '二', 3: '三', 4: '四'} |
150 |
| - title = f"### 方法{seq_dict[i]}" if is_cn else f"### Solution {i}" |
151 |
| - block = ( |
152 |
| - title |
153 |
| - + '\n\n' |
154 |
| - + tab_start |
155 |
| - + '\n\n' |
156 |
| - + '\n\n'.join(codes) |
157 |
| - + '\n\n' |
158 |
| - + tab_end |
159 |
| - ) |
160 |
| - blocks.append(block) |
161 |
| - else: |
162 |
| - block = ( |
163 |
| - blocks[i - 1] |
164 |
| - + '\n\n' |
165 |
| - + tab_start |
166 |
| - + '\n\n' |
167 |
| - + '\n\n'.join(codes) |
168 |
| - + '\n\n' |
169 |
| - + tab_end |
170 |
| - ) |
171 |
| - blocks[i - 1] = block |
172 |
| - is_problem = ( |
173 |
| - content.find("## 解法") != -1 |
174 |
| - if is_cn |
175 |
| - else content.find("## Solutions") != -1 |
176 |
| - and content.find("## Description") != -1 |
177 |
| - ) |
178 |
| - start = '## 解法' if is_cn else '## Solutions' |
179 |
| - end = '<!-- end -->' |
180 |
| - if blocks: |
181 |
| - content = ( |
182 |
| - content[: content.find(start)] |
183 |
| - + start |
184 |
| - + '\n\n' |
185 |
| - + '\n\n'.join(blocks) |
186 |
| - + '\n\n' |
187 |
| - + end |
| 79 | +dirs_mapping = { |
| 80 | + "solution": ("lc", 5), |
| 81 | + "lcof": ("lcof", 4), |
| 82 | + "lcof2": ("lcof2", 4), |
| 83 | + "lcci": ("lcci", 4), |
| 84 | + "lcp": ("lcp", 4), |
| 85 | + "lcs": ("lcs", 4), |
| 86 | +} |
| 87 | + |
| 88 | +dirs = ["solution", "lcof", "lcof2", "lcci", "lcp", "lcs"] |
| 89 | + |
| 90 | +""" |
| 91 | +nav: |
| 92 | + - LeetCode |
| 93 | + - 1. 两数之和: lc/1.md |
| 94 | + - 2. 两数相加: lc/2.md |
| 95 | +""" |
| 96 | + |
| 97 | +navdata_cn = defaultdict(list) |
| 98 | +navdata_en = defaultdict(list) |
| 99 | + |
| 100 | +for dir in dirs: |
| 101 | + target_dir, m = dirs_mapping[dir] |
| 102 | + for p in sorted(get_paths(dir, m)): |
| 103 | + with open(p, "r", encoding="utf-8") as f: |
| 104 | + content = f.read() |
| 105 | + title = content[content.find("[") + 1 : content.find("]")] |
| 106 | + dot = title.find(".") if dir != "lcci" else title.rfind(".") |
| 107 | + num = ( |
| 108 | + title[:dot] |
| 109 | + .replace("面试题", "") |
| 110 | + .replace("剑指 Offer II", "") |
| 111 | + .replace("LCP", "") |
| 112 | + .replace("LCS", "") |
| 113 | + .strip(" ") |
| 114 | + .lstrip("0") |
188 | 115 | )
|
189 |
| - with open(path, "w", encoding="utf-8") as f: |
190 |
| - f.write(content) |
191 |
| - elif is_problem: |
192 |
| - start = '## 解法' if is_cn else '## Solutions' |
193 |
| - content = content[: content.find(start)] + start + '\n\n' + end |
194 |
| - with open(path, "w", encoding="utf-8") as f: |
| 116 | + name = ( |
| 117 | + title[dot + 1 :] |
| 118 | + .replace("面试题", "") |
| 119 | + .replace("剑指 Offer II", "") |
| 120 | + .replace("LCP", "") |
| 121 | + .replace("LCS", "") |
| 122 | + .strip(" ") |
| 123 | + .lstrip("0") |
| 124 | + ) |
| 125 | + if num.endswith("- III"): |
| 126 | + num = num[:-5] + ".3" |
| 127 | + elif num.endswith("- II"): |
| 128 | + num = num[:-4] + ".2" |
| 129 | + elif num.endswith("- I"): |
| 130 | + num = num[:-3] + ".1" |
| 131 | + num = ".".join([x.strip(" ").lstrip("0") for x in num.split(".")]) |
| 132 | + is_en = "README_EN" in p |
| 133 | + if is_en: |
| 134 | + navdata_en[dir].append(f" - {num}. {name}: {target_dir}/{num}.md") |
| 135 | + else: |
| 136 | + navdata_cn[dir].append(f" - {num}. {name}: {target_dir}/{num}.md") |
| 137 | + # 修改代码块 |
| 138 | + while True: |
| 139 | + start = "<!-- tabs:start -->" |
| 140 | + end = "<!-- tabs:end -->" |
| 141 | + i = content.find(start) |
| 142 | + j = content.find(end) |
| 143 | + if i == -1 or j == -1: |
| 144 | + break |
| 145 | + j = content.find(end) |
| 146 | + codes = content[i + len(start) : j].strip() |
| 147 | + res = re.findall(r"```(.+?)\n(.+?)\n```", codes, re.DOTALL) |
| 148 | + result = [] |
| 149 | + if res: |
| 150 | + for lang, code in res: |
| 151 | + name = mapping.get(lang) |
| 152 | + # 需要将 code 缩进 4 个空格 |
| 153 | + code = code.replace("\n", "\n ") |
| 154 | + code_snippet = ( |
| 155 | + f'=== "{name}"\n\n ```{lang}\n {code}\n ```\n' |
| 156 | + ) |
| 157 | + result.append(code_snippet) |
| 158 | + content = content[:i] + "\n".join(result) + content[j + len(end) :] |
| 159 | + docs_dir = ("docs-en" if is_en else "docs") + os.sep + target_dir |
| 160 | + if not os.path.exists(docs_dir): |
| 161 | + os.makedirs(docs_dir) |
| 162 | + new_path = os.path.join(docs_dir, f"{num}.md") |
| 163 | + with open(new_path, "w", encoding="utf-8") as f: |
195 | 164 | f.write(content)
|
196 | 165 |
|
| 166 | + navdata_en[dir].sort(key=lambda x: int(x.split(".")[0].split(" ")[-1])) |
| 167 | + navdata_cn[dir].sort(key=lambda x: int(x.split(".")[0].split(" ")[-1])) |
| 168 | + |
| 169 | +if "nav:" in config: |
| 170 | + config = config[: config.find("nav:")] |
| 171 | +if "nav:" in en_config: |
| 172 | + en_config = en_config[: en_config.find("nav:")] |
| 173 | + |
| 174 | +config += "\nnav:\n" |
| 175 | +en_config += "\nnav:\n" |
| 176 | +config += " - LeetCode:\n" |
| 177 | +en_config += " - LeetCode:\n" |
| 178 | +config += "\n".join(navdata_cn["solution"]) |
| 179 | +en_config += "\n".join(navdata_en["solution"]) |
| 180 | +config += "\n" |
| 181 | +en_config += "\n" |
| 182 | +config += " - 剑指 Offer(第 2 版):\n" |
| 183 | +config += "\n".join(navdata_cn["lcof"]) |
| 184 | +config += "\n" |
| 185 | +config += " - 剑指 Offer(专项突击版):\n" |
| 186 | +config += "\n".join(navdata_cn["lcof2"]) |
| 187 | +config += "\n" |
| 188 | +config += " - 程序员面试金典(第 6 版):\n" |
| 189 | +config += "\n".join(navdata_cn["lcci"]) |
| 190 | +config += "\n" |
| 191 | +en_config += " - Cracking the Coding Interview, 6th Edition:\n" |
| 192 | +en_config += "\n".join(navdata_en["lcci"]) |
| 193 | +en_config += "\n" |
| 194 | + |
| 195 | +with open("mkdocs.yml", "w", encoding="utf-8") as f: |
| 196 | + f.write(config) |
197 | 197 |
|
198 |
| -if __name__ == "__main__": |
199 |
| - extract_code() |
200 |
| - extract_solution_paragraph() |
| 198 | +with open("mkdocs-en.yml", "w", encoding="utf-8") as f: |
| 199 | + f.write(en_config) |
0 commit comments