Skip to content

Commit 4698117

Browse files
Michael Piatekbradfitz
Michael Piatek
authored andcommittedJan 6, 2014
go.net/html: Expose data read from the input reader but not yet tokenized in Tokenizer.
This allows clients to efficiently reconstruct the original input in the case of ErrBufferExceeded. TestMaxBufferReconstruction now properly verifies this. R=bradfitz CC=golang-codereviews https://golang.org/cl/47770043
1 parent 384e4d2 commit 4698117

File tree

2 files changed

+13
-10
lines changed

2 files changed

+13
-10
lines changed
 

‎html/token.go

+5
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,11 @@ func (z *Tokenizer) readByte() byte {
286286
return x
287287
}
288288

289+
// Buffered returns a slice containing data buffered but not yet tokenized.
290+
func (z *Tokenizer) Buffered() []byte {
291+
return z.buf[z.raw.end:]
292+
}
293+
289294
// readAtLeastOneByte wraps an io.Reader so that reading cannot return (0, nil).
290295
// It returns io.ErrNoProgress if the underlying r.Read method returns (0, nil)
291296
// too many times in succession.

‎html/token_test.go

+8-10
Original file line numberDiff line numberDiff line change
@@ -490,7 +490,6 @@ func TestMaxBufferReconstruction(t *testing.T) {
490490
// reconstructing the original input.
491491
tests:
492492
for _, test := range tokenTests {
493-
buffer:
494493
for maxBuf := 1; ; maxBuf++ {
495494
r := strings.NewReader(test.html)
496495
z := NewTokenizer(r)
@@ -500,19 +499,14 @@ tests:
500499
tt := z.Next()
501500
tokenized.Write(z.Raw())
502501
if tt == ErrorToken {
503-
if z.Err() == ErrBufferExceeded {
504-
continue buffer
505-
}
506-
// EOF is expected, and indicates that we found the max maxBuf that
507-
// generates ErrBufferExceeded, so continue to the next test.
508-
if err := z.Err(); err != io.EOF {
502+
if err := z.Err(); err != io.EOF && err != ErrBufferExceeded {
509503
t.Errorf("%s: unexpected error: %v", test.desc, err)
510504
}
511505
break
512506
}
513507
}
514-
// Anything tokenizing along with input left in the reader.
515-
assembled, err := ioutil.ReadAll(io.MultiReader(&tokenized, r))
508+
// Anything tokenized along with untokenized input or data left in the reader.
509+
assembled, err := ioutil.ReadAll(io.MultiReader(&tokenized, bytes.NewReader(z.Buffered()), r))
516510
if err != nil {
517511
t.Errorf("%s: ReadAll: %v", test.desc, err)
518512
continue tests
@@ -521,7 +515,11 @@ tests:
521515
t.Errorf("%s: reassembled html:\n got: %q\nwant: %q", test.desc, got, want)
522516
continue tests
523517
}
524-
break
518+
// EOF indicates that we completed tokenization and hence found the max
519+
// maxBuf that generates ErrBufferExceeded, so continue to the next test.
520+
if z.Err() == io.EOF {
521+
break
522+
}
525523
} // buffer sizes
526524
} // tests
527525
}

0 commit comments

Comments
 (0)
Please sign in to comment.