Skip to content

Commit 9423603

Browse files
committed
fix reading strings that contain quotes or open/closed brackets
1 parent b71c722 commit 9423603

File tree

2 files changed

+180
-14
lines changed

2 files changed

+180
-14
lines changed

Diff for: adafruit_json_stream.py

+38-14
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ def read(self):
4040
self.i += 1
4141
return char
4242

43-
def fast_forward(self, closer, *, return_object=False):
43+
def fast_forward(
44+
self, closer, *, return_object=False
45+
): # pylint: disable=too-many-branches
4446
"""
4547
Read through the stream until the character is ``closer``, ``]``
4648
(ending a list) or ``}`` (ending an object.) Intermediate lists and
@@ -62,6 +64,7 @@ def fast_forward(self, closer, *, return_object=False):
6264
# } = 125, { = 123
6365
buffer[0] = closer - 2
6466

67+
ignore_next = False
6568
while close_stack:
6669
char = self.read()
6770
count += 1
@@ -71,8 +74,14 @@ def fast_forward(self, closer, *, return_object=False):
7174
new_buffer[: len(buffer)] = buffer
7275
buffer = new_buffer
7376
buffer[count] = char
74-
if char == close_stack[-1]:
77+
if ignore_next:
78+
# that character was escaped, skip it
79+
ignore_next = False
80+
elif char == close_stack[-1]:
7581
close_stack.pop()
82+
elif char == ord("\\") and close_stack[-1] == ord('"'):
83+
# if backslash, ignore the next character
84+
ignore_next = True
7685
elif char == ord('"'):
7786
close_stack.append(ord('"'))
7887
elif close_stack[-1] == ord('"'):
@@ -96,26 +105,41 @@ def next_value(self, endswith=None):
96105
if isinstance(endswith, str):
97106
endswith = ord(endswith)
98107
in_string = False
108+
ignore_next = False
99109
while True:
100110
try:
101111
char = self.read()
102112
except EOFError:
103113
char = endswith
104-
if not in_string and (char == endswith or char in (ord("]"), ord("}"))):
105-
self.last_char = char
106-
if len(buf) == 0:
107-
return None
108-
value_string = bytes(buf).decode("utf-8")
109-
return json.loads(value_string)
110-
if char == ord("{"):
111-
return TransientObject(self)
112-
if char == ord("["):
113-
return TransientList(self)
114+
in_string = False
115+
ignore_next = False
114116

115117
if not in_string:
116-
in_string = char == ord('"')
118+
# end character or object/list end
119+
if char == endswith or char in (ord("]"), ord("}")):
120+
self.last_char = char
121+
if len(buf) == 0:
122+
return None
123+
value_string = bytes(buf).decode("utf-8")
124+
return json.loads(value_string)
125+
# string or sub object
126+
if char == ord("{"):
127+
return TransientObject(self)
128+
if char == ord("["):
129+
return TransientList(self)
130+
# start a string
131+
if char == ord('"'):
132+
in_string = True
117133
else:
118-
in_string = char != ord('"')
134+
# skipping any closing or opening character if in a string
135+
# also skipping escaped characters (like quotes in string)
136+
if ignore_next:
137+
ignore_next = False
138+
elif char == ord("\\"):
139+
ignore_next = True
140+
elif char == ord('"'):
141+
in_string = False
142+
119143
buf.append(char)
120144

121145

Diff for: tests/test_json_stream.py

+142
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,38 @@ def dict_with_all_types():
6666
"""
6767

6868

69+
@pytest.fixture
70+
def list_with_bad_strings():
71+
return r"""
72+
[
73+
"\"}\"",
74+
"{\"a\": 1, \"b\": [2,3]}",
75+
"\"",
76+
"\\\"",
77+
"\\\\\"",
78+
"\\x40\"",
79+
"[[[{{{",
80+
"]]]}}}"
81+
]
82+
"""
83+
84+
85+
@pytest.fixture
86+
def dict_with_bad_strings():
87+
return r"""
88+
{
89+
"1": "\"}\"",
90+
"2": "{\"a\": 1, \"b\": [2,3]}",
91+
"3": "\"",
92+
"4": "\\\"",
93+
"5": "\\\\\"",
94+
"6": "\\x40\"",
95+
"7": "[[[{{{",
96+
"8": "]]]}}}"
97+
}
98+
"""
99+
100+
69101
@pytest.fixture
70102
def list_with_values():
71103
return """
@@ -308,6 +340,116 @@ def test_complex_dict(complex_dict):
308340
assert sub_counter == 12
309341

310342

343+
def test_bad_strings_in_list(list_with_bad_strings):
344+
"""Test loading different strings that can confuse the parser."""
345+
346+
bad_strings = [
347+
'"}"',
348+
'{"a": 1, "b": [2,3]}',
349+
'"',
350+
'\\"',
351+
'\\\\"',
352+
'\\x40"',
353+
"[[[{{{",
354+
"]]]}}}",
355+
]
356+
357+
assert json.loads(list_with_bad_strings)
358+
359+
# get each separately
360+
stream = adafruit_json_stream.load(BytesChunkIO(list_with_bad_strings.encode()))
361+
for i, item in enumerate(stream):
362+
assert item == bad_strings[i]
363+
364+
365+
def test_bad_strings_in_list_iter(list_with_bad_strings):
366+
"""Test loading different strings that can confuse the parser."""
367+
368+
bad_strings = [
369+
'"}"',
370+
'{"a": 1, "b": [2,3]}',
371+
'"',
372+
'\\"',
373+
'\\\\"',
374+
'\\x40"',
375+
"[[[{{{",
376+
"]]]}}}",
377+
]
378+
379+
assert json.loads(list_with_bad_strings)
380+
381+
# get each separately
382+
stream = adafruit_json_stream.load(BytesChunkIO(list_with_bad_strings.encode()))
383+
for i, item in enumerate(stream):
384+
assert item == bad_strings[i]
385+
386+
387+
def test_bad_strings_in_dict_as_object(dict_with_bad_strings):
388+
"""Test loading different strings that can confuse the parser."""
389+
390+
bad_strings = {
391+
"1": '"}"',
392+
"2": '{"a": 1, "b": [2,3]}',
393+
"3": '"',
394+
"4": '\\"',
395+
"5": '\\\\"',
396+
"6": '\\x40"',
397+
"7": "[[[{{{",
398+
"8": "]]]}}}",
399+
}
400+
401+
# read all at once
402+
stream = adafruit_json_stream.load(BytesChunkIO(dict_with_bad_strings.encode()))
403+
assert stream.as_object() == bad_strings
404+
405+
406+
def test_bad_strings_in_dict_all_keys(dict_with_bad_strings):
407+
"""Test loading different strings that can confuse the parser."""
408+
409+
bad_strings = {
410+
"1": '"}"',
411+
"2": '{"a": 1, "b": [2,3]}',
412+
"3": '"',
413+
"4": '\\"',
414+
"5": '\\\\"',
415+
"6": '\\x40"',
416+
"7": "[[[{{{",
417+
"8": "]]]}}}",
418+
}
419+
420+
# read one after the other with keys
421+
stream = adafruit_json_stream.load(BytesChunkIO(dict_with_bad_strings.encode()))
422+
assert stream["1"] == bad_strings["1"]
423+
assert stream["2"] == bad_strings["2"]
424+
assert stream["3"] == bad_strings["3"]
425+
assert stream["4"] == bad_strings["4"]
426+
assert stream["5"] == bad_strings["5"]
427+
assert stream["6"] == bad_strings["6"]
428+
assert stream["7"] == bad_strings["7"]
429+
assert stream["8"] == bad_strings["8"]
430+
431+
432+
def test_bad_strings_in_dict_skip_some(dict_with_bad_strings):
433+
"""Test loading different strings that can confuse the parser."""
434+
435+
bad_strings = {
436+
"1": '"}"',
437+
"2": '{"a": 1, "b": [2,3]}',
438+
"3": '"',
439+
"4": '\\"',
440+
"5": '\\\\"',
441+
"6": '\\x40"',
442+
"7": "[[[{{{",
443+
"8": "]]]}}}",
444+
}
445+
446+
# read some, skip some
447+
stream = adafruit_json_stream.load(BytesChunkIO(dict_with_bad_strings.encode()))
448+
assert stream["2"] == bad_strings["2"]
449+
assert stream["5"] == bad_strings["5"]
450+
assert stream["8"] == bad_strings["8"]
451+
452+
311453
def test_complex_dict_grabbing(complex_dict):
312454
"""Test loading a complex dict and grabbing specific keys."""
313455

0 commit comments

Comments
 (0)