Skip to content

Commit cb758d9

Browse files
committed
feat: save raw data of lc problems
1 parent b4ffd21 commit cb758d9

File tree

6 files changed

+4691
-4688
lines changed

6 files changed

+4691
-4688
lines changed

.gitignore

+10-8
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
1-
.idea/
2-
.DS_Store
3-
.vscode
4-
/node_modules
5-
/solution/result.json
6-
/lcof/lcof.json
7-
/lcof/lcof_list.json
8-
/lcci/lcci.json
1+
.idea/
2+
.DS_Store
3+
.vscode
4+
/node_modules
5+
/solution/result.json
6+
/solution/raw.json
7+
/lcof/lcof.json
8+
/lcof/lcof_list.json
9+
/lcci/lcci.json
10+
/solution/__pycache__

solution/main.py

+64-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import json
22
import os
3-
from urllib.parse import quote
3+
import re
4+
from urllib.parse import quote, unquote
45

56
from spider import Spider
67

@@ -130,6 +131,66 @@ def generate_summary(result):
130131
f.write(summary_en)
131132

132133

134+
def refresh(result):
135+
"""update problems"""
136+
pattern = re.compile("src=\"(.*?)\"")
137+
138+
for question in result:
139+
front_question_id = question['frontend_question_id']
140+
print(front_question_id)
141+
142+
path_cn = unquote(str(question['relative_path_cn']).replace("/solution", "."))
143+
path_en = unquote(str(question['relative_path_en']).replace("/solution", "."))
144+
145+
with open(path_cn, 'r', encoding='utf-8') as f1:
146+
cn_content = f1.read()
147+
148+
with open(path_en, 'r', encoding='utf-8') as f2:
149+
en_content = f2.read()
150+
151+
# update question content
152+
old_content = re.search("<!-- 这里写题目描述 -->(.*?)## 解法", cn_content, re.S).group(1)
153+
cn_content = cn_content.replace(
154+
old_content, "\n\n" + question['content_cn'] + "\n\n"
155+
).replace("\n\n <ul>", "\n <ul>")
156+
157+
# replace image url to cdn link
158+
for url in pattern.findall(cn_content) or []:
159+
image_name = (
160+
os.path.basename(url).replace('.PNG', '.png').replace('.JPG', '.jpg')
161+
)
162+
new_url = (
163+
'https://cdn.jsdelivr.net/gh/doocs/leetcode@main'
164+
+ str(question['relative_path_cn']).replace("README.md", "images/")
165+
+ image_name
166+
)
167+
cn_content = cn_content.replace(url, new_url)
168+
169+
with open(path_cn, 'w', encoding='utf-8') as f1:
170+
f1.write(cn_content)
171+
172+
old_content = re.search(
173+
"## Description(.*?)## Solutions", en_content, re.S
174+
).group(1)
175+
en_content = en_content.replace(
176+
old_content, "\n\n" + question['content_en'] + "\n\n"
177+
).replace("\n\n <ul>", "\n <ul>")
178+
179+
for url in pattern.findall(en_content) or []:
180+
image_name = (
181+
os.path.basename(url).replace('.PNG', '.png').replace('.JPG', '.jpg')
182+
)
183+
new_url = (
184+
'https://cdn.jsdelivr.net/gh/doocs/leetcode@main'
185+
+ str(question['relative_path_cn']).replace("README.md", "images/")
186+
+ image_name
187+
)
188+
en_content = en_content.replace(url, new_url)
189+
190+
with open(path_en, 'w', encoding='utf-8') as f2:
191+
f2.write(en_content)
192+
193+
133194
def save(result):
134195
with open('./result.json', 'w', encoding='utf-8') as f:
135196
f.write(json.dumps(result))
@@ -140,6 +201,7 @@ def save(result):
140201
cookie_en = ''
141202
spider = Spider(cookie_cn, cookie_en)
142203
res = spider.run()
204+
save(res)
143205

144206
# with open('./result.json', 'r', encoding='utf-8') as f:
145207
# res = f.read()
@@ -148,4 +210,4 @@ def save(result):
148210
generate_readme(res)
149211
generate_question_readme(res)
150212
generate_summary(res)
151-
save(res)
213+
# refresh(res)

solution/refresh.py

-68
This file was deleted.

solution/spider.py

+7
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def __init__(self, cookie_cn: str, cookie_en: str):
9090
self.cookie_cn = cookie_cn
9191
self.cookie_en = cookie_en
9292
self.session = requests.session()
93+
self.raw_data = {}
9394

9495
def get_all_questions(self) -> List:
9596
"""获取所有题目"""
@@ -192,6 +193,7 @@ def handle(self, question: dict) -> dict:
192193
url_cn = f'https://leetcode-cn.com/problems/{question_title_slug}'
193194
url_en = f'https://leetcode.com/problems/{question_title_slug}'
194195
frontend_question_id = str(question['stat']['frontend_question_id']).zfill(4)
196+
self.raw_data[frontend_question_id] = question_detail
195197
no = int(frontend_question_id) // 100
196198
question_title_en = question['stat']['question__title']
197199
question_title_en = re.sub(r'[\\/:*?"<>|]', '', question_title_en).strip()
@@ -251,6 +253,10 @@ def handle(self, question: dict) -> dict:
251253
item['md_table_row_en'] = [col1_en, col2_en, col3_en, col4_en, col5_en]
252254
return item
253255

256+
def save(self):
257+
with open('./raw.json', 'w', encoding='utf-8') as f:
258+
f.write(json.dumps(self.raw_data))
259+
254260
def run(self):
255261
questions = self.get_all_questions()
256262
details = [self.handle(question) for question in questions]
@@ -259,4 +265,5 @@ def run(self):
259265
]
260266
details += [self.handle(question) for question in failed_questions]
261267
details = [detail for detail in details if detail]
268+
self.save()
262269
return details

0 commit comments

Comments
 (0)