Skip to content

Commit a12a34e

Browse files
committed
Migrate to using musl libc sscanf to improve performance. Keep the existing JS vfscanf for Emscripten FS compatibility, and add the musl-specific version to be used when called from sscanf and vsscanf.
1 parent ba02f95 commit a12a34e

File tree

9 files changed

+395
-9
lines changed

9 files changed

+395
-9
lines changed

src/library.js

-9
Original file line numberDiff line numberDiff line change
@@ -2802,15 +2802,6 @@ LibraryManager.library = {
28022802
var stdin = {{{ makeGetValue(makeGlobalUse('_stdin'), '0', 'void*') }}};
28032803
return _fscanf(stdin, format, varargs);
28042804
},
2805-
sscanf__deps: ['_scanString'],
2806-
sscanf: function(s, format, varargs) {
2807-
// int sscanf(const char *restrict s, const char *restrict format, ... );
2808-
// http://pubs.opengroup.org/onlinepubs/000095399/functions/scanf.html
2809-
var index = 0;
2810-
function get() { return {{{ makeGetValue('s', 'index++', 'i8') }}}; };
2811-
function unget() { index--; };
2812-
return __scanString(format, get, unget, varargs);
2813-
},
28142805
snprintf__deps: ['_formatString', 'malloc'],
28152806
snprintf: function(s, n, format, varargs) {
28162807
// int snprintf(char *restrict s, size_t n, const char *restrict format, ...);

system/lib/libc/musl/src/internal/stdio_impl.h

+3
Original file line numberDiff line numberDiff line change
@@ -95,4 +95,7 @@ int __fmodeflags(const char *);
9595
FILE *__fopen_rb_ca(const char *, FILE *, unsigned char *, size_t);
9696
int __fclose_ca(FILE *);
9797

98+
// XXX Emscripten
99+
int MUSL_vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap);
100+
98101
#endif
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
#include "stdio_impl.h"
2+
#include <string.h>
3+
4+
size_t __string_read(FILE *f, unsigned char *buf, size_t len)
5+
{
6+
char *src = f->cookie;
7+
size_t k = len+256;
8+
char *end = memchr(src, 0, k);
9+
if (end) k = end-src;
10+
if (k < len) len = k;
11+
memcpy(buf, src, len);
12+
f->rpos = (void *)(src+len);
13+
f->rend = (void *)(src+k);
14+
f->cookie = src+k;
15+
return len;
16+
}
+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include <stdio.h>
2+
#include <stdarg.h>
3+
#include "libc.h"
4+
5+
int sscanf(const char *restrict s, const char *restrict fmt, ...)
6+
{
7+
int ret;
8+
va_list ap;
9+
va_start(ap, fmt);
10+
ret = vsscanf(s, fmt, ap);
11+
va_end(ap);
12+
return ret;
13+
}
14+
15+
weak_alias(sscanf,__isoc99_sscanf);
+332
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,332 @@
1+
#include <stdlib.h>
2+
#include <stdarg.h>
3+
#include <ctype.h>
4+
#include <wchar.h>
5+
#include <wctype.h>
6+
#include <limits.h>
7+
#include <string.h>
8+
#include <errno.h>
9+
#include <math.h>
10+
#include <float.h>
11+
#include <inttypes.h>
12+
13+
#include "stdio_impl.h"
14+
#include "shgetc.h"
15+
#include "intscan.h"
16+
#include "floatscan.h"
17+
18+
#define SIZE_hh -2
19+
#define SIZE_h -1
20+
#define SIZE_def 0
21+
#define SIZE_l 1
22+
#define SIZE_L 2
23+
#define SIZE_ll 3
24+
25+
static void store_int(void *dest, int size, unsigned long long i)
26+
{
27+
if (!dest) return;
28+
switch (size) {
29+
case SIZE_hh:
30+
*(char *)dest = i;
31+
break;
32+
case SIZE_h:
33+
*(short *)dest = i;
34+
break;
35+
case SIZE_def:
36+
*(int *)dest = i;
37+
break;
38+
case SIZE_l:
39+
*(long *)dest = i;
40+
break;
41+
case SIZE_ll:
42+
*(long long *)dest = i;
43+
break;
44+
}
45+
}
46+
47+
static void *arg_n(va_list ap, unsigned int n)
48+
{
49+
void *p;
50+
unsigned int i;
51+
va_list ap2;
52+
va_copy(ap2, ap);
53+
for (i=n; i>1; i--) va_arg(ap2, void *);
54+
p = va_arg(ap2, void *);
55+
va_end(ap2);
56+
return p;
57+
}
58+
59+
//int vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap)
60+
int MUSL_vfscanf(FILE *restrict f, const char *restrict fmt, va_list ap) // XXX Emscripten: Only use musl-specific vfscanf when called from within sscanf.
61+
{
62+
int width;
63+
int size;
64+
int alloc;
65+
int base;
66+
const unsigned char *p;
67+
int c, t;
68+
char *s;
69+
wchar_t *wcs;
70+
mbstate_t st;
71+
void *dest=NULL;
72+
int invert;
73+
int matches=0;
74+
unsigned long long x;
75+
long double y;
76+
off_t pos = 0;
77+
unsigned char scanset[257];
78+
size_t i, k;
79+
wchar_t wc;
80+
81+
FLOCK(f);
82+
83+
for (p=(const unsigned char *)fmt; *p; p++) {
84+
85+
alloc = 0;
86+
87+
if (isspace(*p)) {
88+
while (isspace(p[1])) p++;
89+
shlim(f, 0);
90+
while (isspace(shgetc(f)));
91+
shunget(f);
92+
pos += shcnt(f);
93+
continue;
94+
}
95+
if (*p != '%' || p[1] == '%') {
96+
p += *p=='%';
97+
shlim(f, 0);
98+
c = shgetc(f);
99+
if (c!=*p) {
100+
shunget(f);
101+
if (c<0) goto input_fail;
102+
goto match_fail;
103+
}
104+
pos++;
105+
continue;
106+
}
107+
108+
p++;
109+
if (*p=='*') {
110+
dest = 0; p++;
111+
} else if (isdigit(*p) && p[1]=='$') {
112+
dest = arg_n(ap, *p-'0'); p+=2;
113+
} else {
114+
dest = va_arg(ap, void *);
115+
}
116+
117+
for (width=0; isdigit(*p); p++) {
118+
width = 10*width + *p - '0';
119+
}
120+
121+
if (*p=='m') {
122+
alloc = !!dest;
123+
p++;
124+
} else {
125+
alloc = 0;
126+
}
127+
128+
size = SIZE_def;
129+
switch (*p++) {
130+
case 'h':
131+
if (*p == 'h') p++, size = SIZE_hh;
132+
else size = SIZE_h;
133+
break;
134+
case 'l':
135+
if (*p == 'l') p++, size = SIZE_ll;
136+
else size = SIZE_l;
137+
break;
138+
case 'j':
139+
size = SIZE_ll;
140+
break;
141+
case 'z':
142+
case 't':
143+
size = SIZE_l;
144+
break;
145+
case 'L':
146+
size = SIZE_L;
147+
break;
148+
case 'd': case 'i': case 'o': case 'u': case 'x':
149+
case 'a': case 'e': case 'f': case 'g':
150+
case 'A': case 'E': case 'F': case 'G': case 'X':
151+
case 's': case 'c': case '[':
152+
case 'S': case 'C':
153+
case 'p': case 'n':
154+
p--;
155+
break;
156+
default:
157+
goto fmt_fail;
158+
}
159+
160+
t = *p;
161+
162+
/* C or S */
163+
if ((t&0x2f) == 3) {
164+
t |= 32;
165+
size = SIZE_l;
166+
}
167+
168+
switch (t) {
169+
case 'c':
170+
if (width < 1) width = 1;
171+
case '[':
172+
break;
173+
case 'n':
174+
store_int(dest, size, pos);
175+
/* do not increment match count, etc! */
176+
continue;
177+
default:
178+
shlim(f, 0);
179+
while (isspace(shgetc(f)));
180+
shunget(f);
181+
pos += shcnt(f);
182+
}
183+
184+
shlim(f, width);
185+
if (shgetc(f) < 0) goto input_fail;
186+
shunget(f);
187+
188+
switch (t) {
189+
case 's':
190+
case 'c':
191+
case '[':
192+
if (t == 'c' || t == 's') {
193+
memset(scanset, -1, sizeof scanset);
194+
scanset[0] = 0;
195+
if (t == 's') {
196+
scanset[1+'\t'] = 0;
197+
scanset[1+'\n'] = 0;
198+
scanset[1+'\v'] = 0;
199+
scanset[1+'\f'] = 0;
200+
scanset[1+'\r'] = 0;
201+
scanset[1+' '] = 0;
202+
}
203+
} else {
204+
if (*++p == '^') p++, invert = 1;
205+
else invert = 0;
206+
memset(scanset, invert, sizeof scanset);
207+
scanset[0] = 0;
208+
if (*p == '-') p++, scanset[1+'-'] = 1-invert;
209+
else if (*p == ']') p++, scanset[1+']'] = 1-invert;
210+
for (; *p != ']'; p++) {
211+
if (!*p) goto fmt_fail;
212+
if (*p=='-' && p[1] && p[1] != ']')
213+
for (c=p++[-1]; c<*p; c++)
214+
scanset[1+c] = 1-invert;
215+
scanset[1+*p] = 1-invert;
216+
}
217+
}
218+
wcs = 0;
219+
s = 0;
220+
i = 0;
221+
k = t=='c' ? width+1U : 31;
222+
if (size == SIZE_l) {
223+
if (alloc) {
224+
wcs = malloc(k*sizeof(wchar_t));
225+
if (!wcs) goto alloc_fail;
226+
} else {
227+
wcs = dest;
228+
}
229+
st = (mbstate_t){0};
230+
while (scanset[(c=shgetc(f))+1]) {
231+
switch (mbrtowc(&wc, &(char){c}, 1, &st)) {
232+
case -1:
233+
goto input_fail;
234+
case -2:
235+
continue;
236+
}
237+
if (wcs) wcs[i++] = wc;
238+
if (alloc && i==k) {
239+
k+=k+1;
240+
wchar_t *tmp = realloc(wcs, k*sizeof(wchar_t));
241+
if (!tmp) goto alloc_fail;
242+
wcs = tmp;
243+
}
244+
}
245+
if (!mbsinit(&st)) goto input_fail;
246+
} else if (alloc) {
247+
s = malloc(k);
248+
if (!s) goto alloc_fail;
249+
while (scanset[(c=shgetc(f))+1]) {
250+
s[i++] = c;
251+
if (i==k) {
252+
k+=k+1;
253+
char *tmp = realloc(s, k);
254+
if (!tmp) goto alloc_fail;
255+
s = tmp;
256+
}
257+
}
258+
} else if ((s = dest)) {
259+
while (scanset[(c=shgetc(f))+1])
260+
s[i++] = c;
261+
} else {
262+
while (scanset[(c=shgetc(f))+1]);
263+
}
264+
shunget(f);
265+
if (!shcnt(f)) goto match_fail;
266+
if (t == 'c' && shcnt(f) != width) goto match_fail;
267+
if (alloc) {
268+
if (size == SIZE_l) *(wchar_t **)dest = wcs;
269+
else *(char **)dest = s;
270+
}
271+
if (t != 'c') {
272+
if (wcs) wcs[i] = 0;
273+
if (s) s[i] = 0;
274+
}
275+
break;
276+
case 'p':
277+
case 'X':
278+
case 'x':
279+
base = 16;
280+
goto int_common;
281+
case 'o':
282+
base = 8;
283+
goto int_common;
284+
case 'd':
285+
case 'u':
286+
base = 10;
287+
goto int_common;
288+
case 'i':
289+
base = 0;
290+
int_common:
291+
x = __intscan(f, base, 0, ULLONG_MAX);
292+
if (!shcnt(f)) goto match_fail;
293+
if (t=='p' && dest) *(void **)dest = (void *)(uintptr_t)x;
294+
else store_int(dest, size, x);
295+
break;
296+
case 'a': case 'A':
297+
case 'e': case 'E':
298+
case 'f': case 'F':
299+
case 'g': case 'G':
300+
y = __floatscan(f, size, 0);
301+
if (!shcnt(f)) goto match_fail;
302+
if (dest) switch (size) {
303+
case SIZE_def:
304+
*(float *)dest = y;
305+
break;
306+
case SIZE_l:
307+
*(double *)dest = y;
308+
break;
309+
case SIZE_L:
310+
*(long double *)dest = y;
311+
break;
312+
}
313+
break;
314+
}
315+
316+
pos += shcnt(f);
317+
if (dest) matches++;
318+
}
319+
if (0) {
320+
fmt_fail:
321+
alloc_fail:
322+
input_fail:
323+
if (!matches) matches--;
324+
match_fail:
325+
if (alloc) {
326+
free(s);
327+
free(wcs);
328+
}
329+
}
330+
FUNLOCK(f);
331+
return matches;
332+
}

0 commit comments

Comments
 (0)