Skip to content

Commit 731e0bf

Browse files
committed
Auto merge of rust-lang#103828 - cassaundra:fix-format-args-span2, r=cjgillot
Fix incorrect span when using byte-escaped rbrace Fix rust-lang#103826, a format args span issue introduced in rust-lang#102214. The current solution for tracking skipped characters made it so that certain situations were ambiguous enough that the original span couldn't be worked out later. This PR improves on the original solution by keeping track of groups of skipped characters using a map, and fixes the previous bug. See an example of this ambiguity in the [previous PR's discussion](rust-lang#102214 (comment)).
2 parents f206533 + 35c7939 commit 731e0bf

File tree

3 files changed

+107
-48
lines changed

3 files changed

+107
-48
lines changed

compiler/rustc_parse_format/src/lib.rs

+79-48
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,24 @@ impl InnerSpan {
3838
}
3939
}
4040

41+
/// The location and before/after width of a character whose width has changed from its source code
42+
/// representation
43+
#[derive(Copy, Clone, PartialEq, Eq)]
44+
pub struct InnerWidthMapping {
45+
/// Index of the character in the source
46+
pub position: usize,
47+
/// The inner width in characters
48+
pub before: usize,
49+
/// The transformed width in characters
50+
pub after: usize,
51+
}
52+
53+
impl InnerWidthMapping {
54+
pub fn new(position: usize, before: usize, after: usize) -> InnerWidthMapping {
55+
InnerWidthMapping { position, before, after }
56+
}
57+
}
58+
4159
/// The type of format string that we are parsing.
4260
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
4361
pub enum ParseMode {
@@ -200,8 +218,8 @@ pub struct Parser<'a> {
200218
style: Option<usize>,
201219
/// Start and end byte offset of every successfully parsed argument
202220
pub arg_places: Vec<InnerSpan>,
203-
/// Characters that need to be shifted
204-
skips: Vec<usize>,
221+
/// Characters whose length has been changed from their in-code representation
222+
width_map: Vec<InnerWidthMapping>,
205223
/// Span of the last opening brace seen, used for error reporting
206224
last_opening_brace: Option<InnerSpan>,
207225
/// Whether the source string is comes from `println!` as opposed to `format!` or `print!`
@@ -224,7 +242,7 @@ impl<'a> Iterator for Parser<'a> {
224242
'{' => {
225243
let curr_last_brace = self.last_opening_brace;
226244
let byte_pos = self.to_span_index(pos);
227-
let lbrace_end = self.to_span_index(pos + 1);
245+
let lbrace_end = InnerOffset(byte_pos.0 + self.to_span_width(pos));
228246
self.last_opening_brace = Some(byte_pos.to(lbrace_end));
229247
self.cur.next();
230248
if self.consume('{') {
@@ -233,12 +251,15 @@ impl<'a> Iterator for Parser<'a> {
233251
Some(String(self.string(pos + 1)))
234252
} else {
235253
let arg = self.argument(lbrace_end);
236-
if let Some(rbrace_byte_idx) = self.must_consume('}') {
237-
let lbrace_inner_offset = self.to_span_index(pos);
238-
let rbrace_inner_offset = self.to_span_index(rbrace_byte_idx);
254+
if let Some(rbrace_pos) = self.must_consume('}') {
239255
if self.is_literal {
256+
let lbrace_byte_pos = self.to_span_index(pos);
257+
let rbrace_byte_pos = self.to_span_index(rbrace_pos);
258+
259+
let width = self.to_span_width(rbrace_pos);
260+
240261
self.arg_places.push(
241-
lbrace_inner_offset.to(InnerOffset(rbrace_inner_offset.0 + 1)),
262+
lbrace_byte_pos.to(InnerOffset(rbrace_byte_pos.0 + width)),
242263
);
243264
}
244265
} else {
@@ -285,7 +306,7 @@ impl<'a> Parser<'a> {
285306
append_newline: bool,
286307
mode: ParseMode,
287308
) -> Parser<'a> {
288-
let (skips, is_literal) = find_skips_from_snippet(snippet, style);
309+
let (width_map, is_literal) = find_width_map_from_snippet(snippet, style);
289310
Parser {
290311
mode,
291312
input: s,
@@ -294,7 +315,7 @@ impl<'a> Parser<'a> {
294315
curarg: 0,
295316
style,
296317
arg_places: vec![],
297-
skips,
318+
width_map,
298319
last_opening_brace: None,
299320
append_newline,
300321
is_literal,
@@ -367,21 +388,34 @@ impl<'a> Parser<'a> {
367388
None
368389
}
369390

391+
fn remap_pos(&self, mut pos: usize) -> InnerOffset {
392+
for width in &self.width_map {
393+
if pos > width.position {
394+
pos += width.before - width.after;
395+
} else if pos == width.position && width.after == 0 {
396+
pos += width.before;
397+
} else {
398+
break;
399+
}
400+
}
401+
402+
InnerOffset(pos)
403+
}
404+
370405
fn to_span_index(&self, pos: usize) -> InnerOffset {
371-
let mut pos = pos;
372406
// This handles the raw string case, the raw argument is the number of #
373407
// in r###"..."### (we need to add one because of the `r`).
374408
let raw = self.style.map_or(0, |raw| raw + 1);
375-
for skip in &self.skips {
376-
if pos > *skip {
377-
pos += 1;
378-
} else if pos == *skip && raw == 0 {
379-
pos += 1;
380-
} else {
381-
break;
382-
}
409+
let pos = self.remap_pos(pos);
410+
InnerOffset(raw + pos.0 + 1)
411+
}
412+
413+
fn to_span_width(&self, pos: usize) -> usize {
414+
let pos = self.remap_pos(pos);
415+
match self.width_map.iter().find(|w| w.position == pos.0) {
416+
Some(w) => w.before,
417+
None => 1,
383418
}
384-
InnerOffset(raw + pos + 1)
385419
}
386420

387421
fn span(&self, start_pos: usize, end_pos: usize) -> InnerSpan {
@@ -809,10 +843,10 @@ impl<'a> Parser<'a> {
809843
/// Finds the indices of all characters that have been processed and differ between the actual
810844
/// written code (code snippet) and the `InternedString` that gets processed in the `Parser`
811845
/// in order to properly synthesise the intra-string `Span`s for error diagnostics.
812-
fn find_skips_from_snippet(
846+
fn find_width_map_from_snippet(
813847
snippet: Option<string::String>,
814848
str_style: Option<usize>,
815-
) -> (Vec<usize>, bool) {
849+
) -> (Vec<InnerWidthMapping>, bool) {
816850
let snippet = match snippet {
817851
Some(ref s) if s.starts_with('"') || s.starts_with("r\"") || s.starts_with("r#") => s,
818852
_ => return (vec![], false),
@@ -825,43 +859,39 @@ fn find_skips_from_snippet(
825859
let snippet = &snippet[1..snippet.len() - 1];
826860

827861
let mut s = snippet.char_indices();
828-
let mut skips = vec![];
862+
let mut width_mappings = vec![];
829863
while let Some((pos, c)) = s.next() {
830864
match (c, s.clone().next()) {
831865
// skip whitespace and empty lines ending in '\\'
832-
('\\', Some((next_pos, '\n'))) => {
833-
skips.push(pos);
834-
skips.push(next_pos);
866+
('\\', Some((_, '\n'))) => {
835867
let _ = s.next();
868+
let mut width = 2;
836869

837-
while let Some((pos, c)) = s.clone().next() {
870+
while let Some((_, c)) = s.clone().next() {
838871
if matches!(c, ' ' | '\n' | '\t') {
839-
skips.push(pos);
872+
width += 1;
840873
let _ = s.next();
841874
} else {
842875
break;
843876
}
844877
}
878+
879+
width_mappings.push(InnerWidthMapping::new(pos, width, 0));
845880
}
846-
('\\', Some((next_pos, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
847-
skips.push(next_pos);
881+
('\\', Some((_, 'n' | 't' | 'r' | '0' | '\\' | '\'' | '\"'))) => {
882+
width_mappings.push(InnerWidthMapping::new(pos, 2, 1));
848883
let _ = s.next();
849884
}
850885
('\\', Some((_, 'x'))) => {
851-
for _ in 0..3 {
852-
// consume `\xAB` literal
853-
if let Some((pos, _)) = s.next() {
854-
skips.push(pos);
855-
} else {
856-
break;
857-
}
858-
}
886+
// consume `\xAB` literal
887+
s.nth(2);
888+
width_mappings.push(InnerWidthMapping::new(pos, 4, 1));
859889
}
860890
('\\', Some((_, 'u'))) => {
861-
if let Some((pos, _)) = s.next() {
862-
skips.push(pos);
863-
}
864-
if let Some((next_pos, next_c)) = s.next() {
891+
let mut width = 2;
892+
let _ = s.next();
893+
894+
if let Some((_, next_c)) = s.next() {
865895
if next_c == '{' {
866896
// consume up to 6 hexanumeric chars
867897
let digits_len =
@@ -881,31 +911,32 @@ fn find_skips_from_snippet(
881911
let required_skips = digits_len.saturating_sub(len_utf8.saturating_sub(1));
882912

883913
// skip '{' and '}' also
884-
for pos in (next_pos..).take(required_skips + 2) {
885-
skips.push(pos)
886-
}
914+
width += required_skips + 2;
887915

888916
s.nth(digits_len);
889917
} else if next_c.is_digit(16) {
890-
skips.push(next_pos);
918+
width += 1;
919+
891920
// We suggest adding `{` and `}` when appropriate, accept it here as if
892921
// it were correct
893922
let mut i = 0; // consume up to 6 hexanumeric chars
894-
while let (Some((next_pos, c)), _) = (s.next(), i < 6) {
923+
while let (Some((_, c)), _) = (s.next(), i < 6) {
895924
if c.is_digit(16) {
896-
skips.push(next_pos);
925+
width += 1;
897926
} else {
898927
break;
899928
}
900929
i += 1;
901930
}
902931
}
903932
}
933+
934+
width_mappings.push(InnerWidthMapping::new(pos, width, 1));
904935
}
905936
_ => {}
906937
}
907938
}
908-
(skips, true)
939+
(width_mappings, true)
909940
}
910941

911942
// Assert a reasonable size for `Piece`

src/test/ui/fmt/issue-103826.rs

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
fn main() {
2+
format!("{\x7D");
3+
//~^ ERROR 1 positional argument in format string, but no arguments were given
4+
format!("\x7B\x7D");
5+
//~^ ERROR 1 positional argument in format string, but no arguments were given
6+
format!("{\x7D {\x7D");
7+
//~^ ERROR 2 positional arguments in format string, but no arguments were given
8+
}

src/test/ui/fmt/issue-103826.stderr

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
error: 1 positional argument in format string, but no arguments were given
2+
--> $DIR/issue-103826.rs:2:14
3+
|
4+
LL | format!("{\x7D");
5+
| ^^^^^
6+
7+
error: 1 positional argument in format string, but no arguments were given
8+
--> $DIR/issue-103826.rs:4:14
9+
|
10+
LL | format!("\x7B\x7D");
11+
| ^^^^^^^^
12+
13+
error: 2 positional arguments in format string, but no arguments were given
14+
--> $DIR/issue-103826.rs:6:14
15+
|
16+
LL | format!("{\x7D {\x7D");
17+
| ^^^^^ ^^^^^
18+
19+
error: aborting due to 3 previous errors
20+

0 commit comments

Comments
 (0)