Skip to content

Commit 8ffa475

Browse files
rolandshoemakerneild
authored andcommitted
html: only render content literally in the HTML namespace
Per the WHATWG HTML specification, section 13.3, only append the literal content of a text node if we are in the HTML namespace. Thanks to Mohammad Thoriq Aziz for reporting this issue. Fixes golang/go#61615 Fixes CVE-2023-3978 Change-Id: I332152904d4e7646bd2441602bcbe591fc655fa4 Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1942896 Reviewed-by: Tatiana Bradley <tatianabradley@google.com> Run-TryBot: Roland Shoemaker <bracewell@google.com> Reviewed-by: Damien Neil <dneil@google.com> TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com> Reviewed-on: https://go-review.googlesource.com/c/net/+/514896 Reviewed-by: Roland Shoemaker <roland@golang.org> TryBot-Result: Gopher Robot <gobot@golang.org> Run-TryBot: Damien Neil <dneil@google.com>
1 parent 63fe334 commit 8ffa475

File tree

2 files changed

+70
-14
lines changed

2 files changed

+70
-14
lines changed

html/render.go

+24-4
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,8 @@ func render1(w writer, n *Node) error {
194194
}
195195
}
196196

197-
// Render any child nodes.
198-
switch n.Data {
199-
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
197+
// Render any child nodes
198+
if childTextNodesAreLiteral(n) {
200199
for c := n.FirstChild; c != nil; c = c.NextSibling {
201200
if c.Type == TextNode {
202201
if _, err := w.WriteString(c.Data); err != nil {
@@ -213,7 +212,7 @@ func render1(w writer, n *Node) error {
213212
// last element in the file, with no closing tag.
214213
return plaintextAbort
215214
}
216-
default:
215+
} else {
217216
for c := n.FirstChild; c != nil; c = c.NextSibling {
218217
if err := render1(w, c); err != nil {
219218
return err
@@ -231,6 +230,27 @@ func render1(w writer, n *Node) error {
231230
return w.WriteByte('>')
232231
}
233232

233+
func childTextNodesAreLiteral(n *Node) bool {
234+
// Per WHATWG HTML 13.3, if the parent of the current node is a style,
235+
// script, xmp, iframe, noembed, noframes, or plaintext element, and the
236+
// current node is a text node, append the value of the node's data
237+
// literally. The specification is not explicit about it, but we only
238+
// enforce this if we are in the HTML namespace (i.e. when the namespace is
239+
// "").
240+
// NOTE: we also always include noscript elements, although the
241+
// specification states that they should only be rendered as such if
242+
// scripting is enabled for the node (which is not something we track).
243+
if n.Namespace != "" {
244+
return false
245+
}
246+
switch n.Data {
247+
case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
248+
return true
249+
default:
250+
return false
251+
}
252+
}
253+
234254
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
235255
// quotes, but if s contains a double quote, it will use single quotes.
236256
// It is used for writing the identifiers in a doctype declaration.

html/render_test.go

+46-10
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ package html
66

77
import (
88
"bytes"
9+
"fmt"
10+
"strings"
911
"testing"
1012
)
1113

@@ -108,16 +110,16 @@ func TestRenderer(t *testing.T) {
108110
// just commentary. The "0:" prefixes are for easy cross-reference with
109111
// the nodes array.
110112
treeAsText := [...]string{
111-
0: `<html>`,
112-
1: `. <head>`,
113-
2: `. <body>`,
114-
3: `. . "0&lt;1"`,
115-
4: `. . <p id="A" foo="abc&#34;def">`,
116-
5: `. . . "2"`,
117-
6: `. . . <b empty="">`,
118-
7: `. . . . "3"`,
119-
8: `. . . <i backslash="\">`,
120-
9: `. . . . "&amp;4"`,
113+
0: `<html>`,
114+
1: `. <head>`,
115+
2: `. <body>`,
116+
3: `. . "0&lt;1"`,
117+
4: `. . <p id="A" foo="abc&#34;def">`,
118+
5: `. . . "2"`,
119+
6: `. . . <b empty="">`,
120+
7: `. . . . "3"`,
121+
8: `. . . <i backslash="\">`,
122+
9: `. . . . "&amp;4"`,
121123
10: `. . "5"`,
122124
11: `. . <blockquote>`,
123125
12: `. . <br>`,
@@ -169,3 +171,37 @@ func TestRenderer(t *testing.T) {
169171
t.Errorf("got vs want:\n%s\n%s\n", got, want)
170172
}
171173
}
174+
175+
func TestRenderTextNodes(t *testing.T) {
176+
elements := []string{"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript"}
177+
for _, namespace := range []string{
178+
"", // html
179+
"svg",
180+
"math",
181+
} {
182+
for _, e := range elements {
183+
var namespaceOpen, namespaceClose string
184+
if namespace != "" {
185+
namespaceOpen, namespaceClose = fmt.Sprintf("<%s>", namespace), fmt.Sprintf("</%s>", namespace)
186+
}
187+
doc := fmt.Sprintf(`<html><head></head><body>%s<%s>&</%s>%s</body></html>`, namespaceOpen, e, e, namespaceClose)
188+
n, err := Parse(strings.NewReader(doc))
189+
if err != nil {
190+
t.Fatal(err)
191+
}
192+
b := bytes.NewBuffer(nil)
193+
if err := Render(b, n); err != nil {
194+
t.Fatal(err)
195+
}
196+
197+
expected := doc
198+
if namespace != "" {
199+
expected = strings.Replace(expected, "&", "&amp;", 1)
200+
}
201+
202+
if b.String() != expected {
203+
t.Errorf("unexpected output: got %q, want %q", b.String(), expected)
204+
}
205+
}
206+
}
207+
}

0 commit comments

Comments
 (0)