Skip to content

Commit 4daa6ed

Browse files
committed
Breaking change: remove TotalRows of row iterator and performance optimization
Reduce allocation memory 20%, and 80% GC times for the row's iterator
1 parent 50c4ded commit 4daa6ed

File tree

3 files changed

+75
-101
lines changed

3 files changed

+75
-101
lines changed

cell_test.go

+8
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,14 @@ func TestGetCellType(t *testing.T) {
340340
assert.EqualError(t, err, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")).Error())
341341
}
342342

343+
func TestGetValueFrom(t *testing.T) {
344+
f := NewFile()
345+
c := xlsxC{T: "s"}
346+
value, err := c.getValueFrom(f, f.sharedStringsReader(), false)
347+
assert.NoError(t, err)
348+
assert.Equal(t, "", value)
349+
}
350+
343351
func TestGetCellFormula(t *testing.T) {
344352
// Test get cell formula on not exist worksheet.
345353
f := NewFile()

rows.go

+65-91
Original file line numberDiff line numberDiff line change
@@ -68,29 +68,49 @@ func (f *File) GetRows(sheet string, opts ...Options) ([][]string, error) {
6868

6969
// Rows defines an iterator to a sheet.
7070
type Rows struct {
71-
err error
72-
curRow, totalRows, stashRow int
73-
rawCellValue bool
74-
sheet string
75-
f *File
76-
tempFile *os.File
77-
decoder *xml.Decoder
71+
err error
72+
curRow, seekRow int
73+
needClose, rawCellValue bool
74+
sheet string
75+
f *File
76+
tempFile *os.File
77+
sst *xlsxSST
78+
decoder *xml.Decoder
79+
token xml.Token
7880
}
7981

8082
// CurrentRow returns the row number that represents the current row.
8183
func (rows *Rows) CurrentRow() int {
82-
return rows.curRow
83-
}
84-
85-
// TotalRows returns the total rows count in the worksheet.
86-
func (rows *Rows) TotalRows() int {
87-
return rows.totalRows
84+
return rows.seekRow
8885
}
8986

9087
// Next will return true if find the next row element.
9188
func (rows *Rows) Next() bool {
92-
rows.curRow++
93-
return rows.curRow <= rows.totalRows
89+
rows.seekRow++
90+
if rows.curRow >= rows.seekRow {
91+
return true
92+
}
93+
for {
94+
token, _ := rows.decoder.Token()
95+
if token == nil {
96+
return false
97+
}
98+
switch xmlElement := token.(type) {
99+
case xml.StartElement:
100+
if xmlElement.Name.Local == "row" {
101+
rows.curRow++
102+
if rowNum, _ := attrValToInt("r", xmlElement.Attr); rowNum != 0 {
103+
rows.curRow = rowNum
104+
}
105+
rows.token = token
106+
return true
107+
}
108+
case xml.EndElement:
109+
if xmlElement.Name.Local == "sheetData" {
110+
return false
111+
}
112+
}
113+
}
94114
}
95115

96116
// Error will return the error when the error occurs.
@@ -109,44 +129,40 @@ func (rows *Rows) Close() error {
109129

110130
// Columns return the current row's column values.
111131
func (rows *Rows) Columns(opts ...Options) ([]string, error) {
112-
var rowIterator rowXMLIterator
113-
if rows.stashRow >= rows.curRow {
114-
return rowIterator.columns, rowIterator.err
132+
if rows.curRow > rows.seekRow {
133+
return nil, nil
115134
}
116-
rows.rawCellValue = parseOptions(opts...).RawCellValue
117-
rowIterator.rows = rows
118-
rowIterator.d = rows.f.sharedStringsReader()
135+
var rowIterator rowXMLIterator
136+
var token xml.Token
137+
rows.rawCellValue, rows.sst = parseOptions(opts...).RawCellValue, rows.f.sharedStringsReader()
119138
for {
120-
token, _ := rows.decoder.Token()
121-
if token == nil {
139+
if rows.token != nil {
140+
token = rows.token
141+
} else if token, _ = rows.decoder.Token(); token == nil {
122142
break
123143
}
124144
switch xmlElement := token.(type) {
125145
case xml.StartElement:
126146
rowIterator.inElement = xmlElement.Name.Local
127147
if rowIterator.inElement == "row" {
128-
rowIterator.row++
129-
if rowIterator.attrR, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowIterator.attrR != 0 {
130-
rowIterator.row = rowIterator.attrR
148+
rowNum := 0
149+
if rowNum, rowIterator.err = attrValToInt("r", xmlElement.Attr); rowNum != 0 {
150+
rows.curRow = rowNum
151+
} else if rows.token == nil {
152+
rows.curRow++
131153
}
132-
if rowIterator.row > rowIterator.rows.curRow {
133-
rowIterator.rows.stashRow = rowIterator.row - 1
154+
if rows.curRow > rows.seekRow {
155+
rows.token = nil
134156
return rowIterator.columns, rowIterator.err
135157
}
136158
}
137-
rowXMLHandler(&rowIterator, &xmlElement, rows.rawCellValue)
138-
if rowIterator.err != nil {
159+
if rows.rowXMLHandler(&rowIterator, &xmlElement, rows.rawCellValue); rowIterator.err != nil {
160+
rows.token = nil
139161
return rowIterator.columns, rowIterator.err
140162
}
163+
rows.token = nil
141164
case xml.EndElement:
142-
rowIterator.inElement = xmlElement.Name.Local
143-
if rowIterator.row == 0 && rowIterator.rows.curRow > 1 {
144-
rowIterator.row = rowIterator.rows.curRow
145-
}
146-
if rowIterator.inElement == "row" && rowIterator.row+1 < rowIterator.rows.curRow {
147-
return rowIterator.columns, rowIterator.err
148-
}
149-
if rowIterator.inElement == "sheetData" {
165+
if xmlElement.Name.Local == "sheetData" {
150166
return rowIterator.columns, rowIterator.err
151167
}
152168
}
@@ -173,29 +189,25 @@ func (err ErrSheetNotExist) Error() string {
173189

174190
// rowXMLIterator defined runtime use field for the worksheet row SAX parser.
175191
type rowXMLIterator struct {
176-
err error
177-
inElement string
178-
attrR, cellCol, row int
179-
columns []string
180-
rows *Rows
181-
d *xlsxSST
192+
err error
193+
inElement string
194+
cellCol int
195+
columns []string
182196
}
183197

184198
// rowXMLHandler parse the row XML element of the worksheet.
185-
func rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement, raw bool) {
186-
rowIterator.err = nil
199+
func (rows *Rows) rowXMLHandler(rowIterator *rowXMLIterator, xmlElement *xml.StartElement, raw bool) {
187200
if rowIterator.inElement == "c" {
188201
rowIterator.cellCol++
189202
colCell := xlsxC{}
190-
_ = rowIterator.rows.decoder.DecodeElement(&colCell, xmlElement)
203+
_ = rows.decoder.DecodeElement(&colCell, xmlElement)
191204
if colCell.R != "" {
192205
if rowIterator.cellCol, _, rowIterator.err = CellNameToCoordinates(colCell.R); rowIterator.err != nil {
193206
return
194207
}
195208
}
196209
blank := rowIterator.cellCol - len(rowIterator.columns)
197-
val, _ := colCell.getValueFrom(rowIterator.rows.f, rowIterator.d, raw)
198-
if val != "" || colCell.F != nil {
210+
if val, _ := colCell.getValueFrom(rows.f, rows.sst, raw); val != "" || colCell.F != nil {
199211
rowIterator.columns = append(appendSpace(blank, rowIterator.columns), val)
200212
}
201213
}
@@ -236,48 +248,10 @@ func (f *File) Rows(sheet string) (*Rows, error) {
236248
output, _ := xml.Marshal(worksheet)
237249
f.saveFileList(name, f.replaceNameSpaceBytes(name, output))
238250
}
239-
var (
240-
err error
241-
inElement string
242-
row int
243-
rows Rows
244-
needClose bool
245-
decoder *xml.Decoder
246-
tempFile *os.File
247-
)
248-
if needClose, decoder, tempFile, err = f.xmlDecoder(name); needClose && err == nil {
249-
defer tempFile.Close()
250-
}
251-
for {
252-
token, _ := decoder.Token()
253-
if token == nil {
254-
break
255-
}
256-
switch xmlElement := token.(type) {
257-
case xml.StartElement:
258-
inElement = xmlElement.Name.Local
259-
if inElement == "row" {
260-
row++
261-
for _, attr := range xmlElement.Attr {
262-
if attr.Name.Local == "r" {
263-
row, err = strconv.Atoi(attr.Value)
264-
if err != nil {
265-
return &rows, err
266-
}
267-
}
268-
}
269-
rows.totalRows = row
270-
}
271-
case xml.EndElement:
272-
if xmlElement.Name.Local == "sheetData" {
273-
rows.f = f
274-
rows.sheet = name
275-
_, rows.decoder, rows.tempFile, err = f.xmlDecoder(name)
276-
return &rows, err
277-
}
278-
}
279-
}
280-
return &rows, nil
251+
var err error
252+
rows := Rows{f: f, sheet: name}
253+
rows.needClose, rows.decoder, rows.tempFile, err = f.xmlDecoder(name)
254+
return &rows, err
281255
}
282256

283257
// getFromStringItem build shared string item offset list from system temporary

rows_test.go

+2-10
Original file line numberDiff line numberDiff line change
@@ -44,13 +44,6 @@ func TestRows(t *testing.T) {
4444
}
4545
assert.NoError(t, f.Close())
4646

47-
f = NewFile()
48-
f.Pkg.Store("xl/worksheets/sheet1.xml", []byte(`<worksheet><sheetData><row r="1"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`))
49-
f.Sheet.Delete("xl/worksheets/sheet1.xml")
50-
delete(f.checked, "xl/worksheets/sheet1.xml")
51-
_, err = f.Rows("Sheet1")
52-
assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`)
53-
5447
f.Pkg.Store("xl/worksheets/sheet1.xml", nil)
5548
_, err = f.Rows("Sheet1")
5649
assert.NoError(t, err)
@@ -82,7 +75,6 @@ func TestRowsIterator(t *testing.T) {
8275
for rows.Next() {
8376
rowCount++
8477
assert.Equal(t, rowCount, rows.CurrentRow())
85-
assert.Equal(t, expectedNumRow, rows.TotalRows())
8678
require.True(t, rowCount <= expectedNumRow, "rowCount is greater than expected")
8779
}
8880
assert.Equal(t, expectedNumRow, rowCount)
@@ -186,16 +178,16 @@ func TestColumns(t *testing.T) {
186178
assert.NoError(t, err)
187179

188180
rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="A"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`)))
189-
rows.stashRow, rows.curRow = 0, 1
181+
assert.True(t, rows.Next())
190182
_, err = rows.Columns()
191183
assert.EqualError(t, err, `strconv.Atoi: parsing "A": invalid syntax`)
192184

193185
rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="1"><c r="A1" t="s"><v>1</v></c></row><row r="A"><c r="2" t="str"><v>B</v></c></row></sheetData></worksheet>`)))
194186
_, err = rows.Columns()
195187
assert.NoError(t, err)
196188

197-
rows.curRow = 3
198189
rows.decoder = f.xmlNewDecoder(bytes.NewReader([]byte(`<worksheet><sheetData><row r="1"><c r="A" t="s"><v>1</v></c></row></sheetData></worksheet>`)))
190+
assert.True(t, rows.Next())
199191
_, err = rows.Columns()
200192
assert.EqualError(t, err, newCellNameToCoordinatesError("A", newInvalidCellNameError("A")).Error())
201193

0 commit comments

Comments
 (0)