Skip to content

Commit 2cfcf9e

Browse files
committed
encode the escaped string literal which not permitted in an XML 1.0 document
1 parent c62ced7 commit 2cfcf9e

File tree

3 files changed

+57
-5
lines changed

3 files changed

+57
-5
lines changed

cell.go

+2-1
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,7 @@ func (f *File) setSharedString(val string) int {
288288
}
289289
sst.Count++
290290
sst.UniqueCount++
291+
val = bstrMarshal(val)
291292
t := xlsxT{Val: val}
292293
// Leading and ending space(s) character detection.
293294
if len(val) > 0 && (val[0] == 32 || val[len(val)-1] == 32) {
@@ -315,7 +316,7 @@ func setCellStr(value string) (t string, v string, ns xml.Attr) {
315316
}
316317
}
317318
t = "str"
318-
v = value
319+
v = bstrMarshal(value)
319320
return
320321
}
321322

lib.go

+42-4
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,11 @@ func isNumeric(s string) (bool, int) {
456456
return true, p
457457
}
458458

459+
var (
460+
bstrExp = regexp.MustCompile(`_x[a-zA-Z\d]{4}_`)
461+
bstrEscapeExp = regexp.MustCompile(`x[a-zA-Z\d]{4}_`)
462+
)
463+
459464
// bstrUnmarshal parses the binary basic string, this will trim escaped string
460465
// literal which not permitted in an XML 1.0 document. The basic string
461466
// variant type can store any valid Unicode character. Unicode characters
@@ -468,15 +473,13 @@ func isNumeric(s string) (bool, int) {
468473
// initial underscore shall itself be escaped (i.e. stored as _x005F_). For
469474
// example: The string literal _x0008_ would be stored as _x005F_x0008_.
470475
func bstrUnmarshal(s string) (result string) {
471-
bstrExp := regexp.MustCompile(`_x[a-zA-Z0-9]{4}_`)
472-
escapeExp := regexp.MustCompile(`x[a-zA-Z0-9]{4}_`)
473476
matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0
474477
for _, match := range matches {
475478
result += s[cursor:match[0]]
476479
subStr := s[match[0]:match[1]]
477480
if subStr == "_x005F_" {
478481
cursor = match[1]
479-
if l > match[1]+6 && !escapeExp.MatchString(s[match[1]:match[1]+6]) {
482+
if l > match[1]+6 && !bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) {
480483
result += subStr
481484
continue
482485
}
@@ -487,7 +490,7 @@ func bstrUnmarshal(s string) (result string) {
487490
cursor = match[1]
488491
v, err := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`)
489492
if err != nil {
490-
if l > match[1]+6 && escapeExp.MatchString(s[match[1]:match[1]+6]) {
493+
if l > match[1]+6 && bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) {
491494
result += subStr[:6]
492495
cursor = match[1] + 6
493496
continue
@@ -512,6 +515,41 @@ func bstrUnmarshal(s string) (result string) {
512515
return result
513516
}
514517

518+
// bstrMarshal encode the escaped string literal which not permitted in an XML
519+
// 1.0 document.
520+
func bstrMarshal(s string) (result string) {
521+
matches, l, cursor := bstrExp.FindAllStringSubmatchIndex(s, -1), len(s), 0
522+
for _, match := range matches {
523+
result += s[cursor:match[0]]
524+
subStr := s[match[0]:match[1]]
525+
if subStr == "_x005F_" {
526+
cursor = match[1]
527+
if match[1]+6 <= l && bstrEscapeExp.MatchString(s[match[1]:match[1]+6]) {
528+
_, err := strconv.Unquote(`"\u` + s[match[1]+1:match[1]+5] + `"`)
529+
if err == nil {
530+
result += subStr + "x005F" + subStr
531+
continue
532+
}
533+
}
534+
result += subStr + "x005F_"
535+
continue
536+
}
537+
if bstrExp.MatchString(subStr) {
538+
cursor = match[1]
539+
_, err := strconv.Unquote(`"\u` + s[match[0]+2:match[1]-1] + `"`)
540+
if err == nil {
541+
result += "_x005F" + subStr
542+
continue
543+
}
544+
result += subStr
545+
}
546+
}
547+
if cursor < l {
548+
result += s[cursor:]
549+
}
550+
return result
551+
}
552+
515553
// Stack defined an abstract data type that serves as a collection of elements.
516554
type Stack struct {
517555
list *list.List

lib_test.go

+13
Original file line numberDiff line numberDiff line change
@@ -258,3 +258,16 @@ func TestBstrUnmarshal(t *testing.T) {
258258
assert.Equal(t, expected, bstrUnmarshal(bstr))
259259
}
260260
}
261+
262+
func TestBstrMarshal(t *testing.T) {
263+
bstrs := map[string]string{
264+
"*_xG05F_*": "*_xG05F_*",
265+
"*_x0008_*": "*_x005F_x0008_*",
266+
"*_x005F_*": "*_x005F_x005F_*",
267+
"*_x005F_xG006_*": "*_x005F_x005F_xG006_*",
268+
"*_x005F_x0006_*": "*_x005F_x005F_x005F_x0006_*",
269+
}
270+
for bstr, expected := range bstrs {
271+
assert.Equal(t, expected, bstrMarshal(bstr))
272+
}
273+
}

0 commit comments

Comments
 (0)