Skip to content

Commit 1ff8554

Browse files
authored
Automate release notes generation using Git and GitHub metadata (#5477)
A Python script automates the creation of structured release notes by: - Fetching all commits to `main` since the last tag - Extracting PR numbers from commit messages - Batch-fetching recent PR details (title, labels, body) from GitHub - Categorizing PRs using labels: `enhancement`, `preview`, `bug`, and other (e.g., `internal`) - Identifying top-level highlights using: - `release-highlight` (important user-facing features) - `includes-media` (PRs with visual content) - Formatting the categories and sections into markdown ## Usage ```bash uv run scripts/generate_release_notes.py <since-tag> # Example: python generate_release_notes.py 0.14.7 ```
1 parent 9ec52a6 commit 1ff8554

File tree

1 file changed

+301
-0
lines changed

1 file changed

+301
-0
lines changed

scripts/generate_release_notes.py

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
# /// script
2+
# requires-python = ">=3.13"
3+
# dependencies = [
4+
# "msgspec",
5+
# ]
6+
#
7+
# [tool.uv]
8+
# exclude-newer = "2025-06-27T12:38:25.742953-04:00"
9+
# ///
10+
"""Generate release notes from commits on main branch."""
11+
12+
from __future__ import annotations
13+
14+
import re
15+
import subprocess
16+
import sys
17+
18+
import msgspec
19+
20+
21+
class Author(msgspec.Struct):
22+
"""GitHub author/user information."""
23+
24+
login: str
25+
26+
27+
class Label(msgspec.Struct):
28+
"""GitHub label information."""
29+
30+
name: str
31+
color: str | None = None
32+
description: str | None = None
33+
34+
35+
class PullRequest(msgspec.Struct):
36+
"""GitHub Pull Request information."""
37+
38+
number: int
39+
title: str
40+
author: Author
41+
labels: list[Label]
42+
body: str | None
43+
mergedAt: str | None = None
44+
45+
46+
class Commit(msgspec.Struct):
47+
"""Git commit information."""
48+
49+
sha: str
50+
message: str
51+
pr_number: int | None = None
52+
53+
54+
class CategorizedEntry(msgspec.Struct):
55+
"""A release note entry with its PR information."""
56+
57+
commit: Commit
58+
pr: PullRequest | None = None
59+
60+
61+
def get_commits_since_tag(since_tag: str) -> list[Commit]:
62+
"""Get commits on main since a specific tag."""
63+
result = subprocess.run(
64+
[
65+
"git",
66+
"log",
67+
f"{since_tag}..HEAD",
68+
"--format=%H %s",
69+
"--first-parent", # Only follow the first parent (main branch)
70+
"main",
71+
],
72+
capture_output=True,
73+
text=True,
74+
check=True,
75+
)
76+
77+
commits = []
78+
for line in result.stdout.strip().split("\n"):
79+
if not line:
80+
continue
81+
82+
parts = line.split(" ", 1)
83+
if len(parts) != 2:
84+
continue
85+
86+
sha, message = parts
87+
88+
# Extract PR number from squash merge commit message
89+
# Looking for patterns like (#1234) or #1234
90+
pr_match = re.search(r"#(\d+)", message)
91+
pr_number = int(pr_match.group(1)) if pr_match else None
92+
93+
commits.append(Commit(sha=sha, message=message, pr_number=pr_number))
94+
95+
return commits
96+
97+
98+
def get_merged_prs(limit: int = 100) -> dict[int, PullRequest]:
99+
"""Get recently merged PRs and return as a dict keyed by PR number."""
100+
result = subprocess.run(
101+
[
102+
"gh",
103+
"pr",
104+
"list",
105+
"--base",
106+
"main",
107+
"--state",
108+
"merged",
109+
"--limit",
110+
str(limit),
111+
"--json",
112+
"number,title,author,labels,body,mergedAt",
113+
],
114+
check=True,
115+
capture_output=True,
116+
text=True,
117+
)
118+
return {
119+
pr.number: pr
120+
for pr in msgspec.json.decode(result.stdout, type=list[PullRequest])
121+
}
122+
123+
124+
def extract_media_from_body(body: str | None) -> list[str]:
125+
"""Extract media (images/links) from PR body."""
126+
if not body:
127+
return []
128+
129+
media = []
130+
131+
# Find markdown images: ![alt](url)
132+
img_pattern = r"!\[.*?\]\((.*?)\)"
133+
for match in re.finditer(img_pattern, body):
134+
media.append(f'<img src="{match.group(1)}" alt="PR media">')
135+
136+
# Find HTML img tags
137+
html_img_pattern = r'<img[^>]+src=["\']([^"\']+)["\'][^>]*>'
138+
for match in re.finditer(html_img_pattern, body):
139+
media.append(match.group(0))
140+
141+
# Find video links (common patterns)
142+
video_patterns = [
143+
r"https?://[^\s]+\.(?:mp4|webm|mov|gif)",
144+
r"https?://(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)[^\s]+",
145+
r"https?://(?:www\.)?vimeo\.com/[^\s]+",
146+
]
147+
for pattern in video_patterns:
148+
for match in re.finditer(pattern, body):
149+
media.append(f'<a href="{match.group(0)}">{match.group(0)}</a>')
150+
151+
return media
152+
153+
154+
def categorize_entries(
155+
entries: list[CategorizedEntry],
156+
) -> dict[str, list[CategorizedEntry]]:
157+
"""Categorize entries based on PR labels."""
158+
# TODO: Could add more or be more granular
159+
categories = {
160+
"bug": [],
161+
"enhancement": [],
162+
"documentation": [],
163+
"preview": [],
164+
"other": [],
165+
"highlights": [],
166+
}
167+
168+
for entry in entries:
169+
if entry.pr is None:
170+
categories["other"].append(entry)
171+
continue
172+
173+
label_names = {label.name for label in entry.pr.labels}
174+
175+
# Skip entries labeled as "internal"
176+
if "internal" in label_names:
177+
continue
178+
179+
if "release-highlight" in label_names:
180+
categories["highlights"].append(entry)
181+
182+
if "bug" in label_names:
183+
categories["bug"].append(entry)
184+
elif "enhancement" in label_names:
185+
categories["enhancement"].append(entry)
186+
elif "documentation" in label_names:
187+
categories["documentation"].append(entry)
188+
elif "preview" in label_names:
189+
categories["preview"].append(entry)
190+
else:
191+
categories["other"].append(entry)
192+
193+
return categories
194+
195+
196+
def strip_conventional_prefix(title: str) -> str:
197+
"""Strip conventional commit prefixes and capitalize first letter."""
198+
# Match patterns like "word:" or "word(scope):" at the beginning
199+
match = re.match(r"^(\w+)(?:\([^)]+\))?:\s*(.+)", title)
200+
if match:
201+
# Get the part after the prefix and capitalize first letter
202+
stripped = match.group(2)
203+
return stripped[0].upper() + stripped[1:] if stripped else stripped
204+
return title
205+
206+
207+
def format_entry(entry: CategorizedEntry) -> str:
208+
if entry.pr:
209+
title = strip_conventional_prefix(entry.pr.title)
210+
return f"* {title} ([#{entry.pr.number}](https://github.com/marimo-team/marimo/pull/{entry.pr.number}))"
211+
title = entry.commit.message
212+
title = strip_conventional_prefix(entry.commit.message)
213+
return f"* {title} ({entry.commit.sha[:7]})"
214+
215+
216+
def generate_release_notes(since_tag: str) -> str:
217+
"""Generate release notes since a specific tag."""
218+
commits = get_commits_since_tag(since_tag)
219+
pr_map = get_merged_prs(limit=100)
220+
221+
# Match commits with PRs
222+
entries = []
223+
for commit in commits:
224+
pr = pr_map.get(commit.pr_number) if commit.pr_number else None
225+
entries.append(CategorizedEntry(commit=commit, pr=pr))
226+
227+
categories = categorize_entries(entries)
228+
229+
notes = ["## What's Changed\n"]
230+
if categories["highlights"]:
231+
for i, entry in enumerate(categories["highlights"]):
232+
if i > 0:
233+
notes.append("")
234+
235+
if entry.pr:
236+
notes.append(f"**TODO: {entry.pr.title} #{entry.pr.number}**")
237+
notes.append("")
238+
notes.append("TODO: Description of the feature")
239+
240+
# Check for media
241+
label_names = {label.name for label in entry.pr.labels}
242+
if "includes-media" in label_names:
243+
media = extract_media_from_body(entry.pr.body)
244+
if media:
245+
notes.append("")
246+
for item in media:
247+
notes.append(item)
248+
notes.append("")
249+
250+
if categories["enhancement"]:
251+
notes.append("## ✨ Enhancements")
252+
for entry in categories["enhancement"]:
253+
notes.append(format_entry(entry))
254+
notes.append("")
255+
256+
if categories["bug"]:
257+
notes.append("## 🐛 Bug fixes")
258+
for entry in categories["bug"]:
259+
notes.append(format_entry(entry))
260+
notes.append("")
261+
262+
if categories["documentation"]:
263+
notes.append("## 📚 Documentation")
264+
for entry in categories["documentation"]:
265+
notes.append(format_entry(entry))
266+
notes.append("")
267+
268+
if categories["preview"]:
269+
notes.append("## 🔬 Preview features")
270+
for entry in categories["preview"]:
271+
notes.append(format_entry(entry))
272+
notes.append("")
273+
274+
if categories["other"]:
275+
notes.append("## 📝 Other changes")
276+
for entry in categories["other"]:
277+
notes.append(format_entry(entry))
278+
notes.append("")
279+
280+
notes.append("## New Contributors")
281+
notes.append("* TODO: Check for new contributors")
282+
283+
current_tag = "TODO_CURRENT_VERSION"
284+
notes.append(
285+
f"\n**Full Changelog**: https://github.com/marimo-team/marimo/compare/{since_tag}...{current_tag}"
286+
)
287+
288+
return "\n".join(notes)
289+
290+
291+
def main() -> None:
292+
if len(sys.argv) < 2:
293+
print("Usage: generate_release_notes.py <since-tag>")
294+
print("Example: generate_release_notes.py 0.14.7")
295+
sys.exit(1)
296+
297+
print(generate_release_notes(sys.argv[1]))
298+
299+
300+
if __name__ == "__main__":
301+
main()

0 commit comments

Comments
 (0)