Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lexer: Treat more floats with empty exponent as valid tokens #131656

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 71 additions & 29 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ pub enum DocStyle {
pub enum LiteralKind {
/// `12_u8`, `0o100`, `0b120i99`, `1f32`.
Int { base: Base, empty_int: bool },
/// `12.34f32`, `1e3`, but not `1f32`.
/// `12.34f32`, `1e3` and `1e+`, but not `1f32` or `1em`.
Float { base: Base, empty_exponent: bool },
/// `'a'`, `'\\'`, `'''`, `';`
Char { terminated: bool },
Expand Down Expand Up @@ -409,8 +409,8 @@ impl Cursor<'_> {

// Numeric literal.
c @ '0'..='9' => {
let literal_kind = self.number(c);
let suffix_start = self.pos_within_token();
let (literal_kind, suffix_start) = self.number(c);
let suffix_start = suffix_start.unwrap_or(self.pos_within_token());
self.eat_literal_suffix();
TokenKind::Literal { kind: literal_kind, suffix_start }
}
Expand Down Expand Up @@ -606,7 +606,9 @@ impl Cursor<'_> {
}
}

fn number(&mut self, first_digit: char) -> LiteralKind {
/// Parses a number and in `.1` returns the offset of the literal suffix if
/// different from the current position on return.
fn number(&mut self, first_digit: char) -> (LiteralKind, Option<u32>) {
debug_assert!('0' <= self.prev() && self.prev() <= '9');
let mut base = Base::Decimal;
if first_digit == '0' {
Expand All @@ -616,21 +618,21 @@ impl Cursor<'_> {
base = Base::Binary;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
'o' => {
base = Base::Octal;
self.bump();
if !self.eat_decimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
'x' => {
base = Base::Hexadecimal;
self.bump();
if !self.eat_hexadecimal_digits() {
return Int { base, empty_int: true };
return (Int { base, empty_int: true }, None);
}
}
// Not a base prefix; consume additional digits.
Expand All @@ -642,40 +644,79 @@ impl Cursor<'_> {
'.' | 'e' | 'E' => {}

// Just a 0.
_ => return Int { base, empty_int: false },
_ => return (Int { base, empty_int: false }, None),
}
} else {
// No base prefix, parse number in the usual way.
self.eat_decimal_digits();
};

match self.first() {
match (self.first(), self.second()) {
// Don't be greedy if this is actually an
// integer literal followed by field/method access or a range pattern
// (`0..2` and `12.foo()`)
'.' if self.second() != '.' && !is_id_start(self.second()) => {
// might have stuff after the ., and if it does, it needs to start
// with a number
('.', second) if second != '.' && !is_id_start(second) => {
self.bump();
self.eat_decimal_digits();

let mut empty_exponent = false;
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
match self.first() {
'e' | 'E' => {
self.bump();
empty_exponent = !self.eat_float_exponent();
}
_ => (),
let suffix_start = match (self.first(), self.second()) {
('e' | 'E', '_') => self.eat_underscore_exponent(),
('e' | 'E', '0'..='9' | '+' | '-') => {
// Definitely an exponent (which still can be empty).
self.bump();
empty_exponent = !self.eat_float_exponent();
None
}
_ => None,
};
(Float { base, empty_exponent }, suffix_start)
}
('e' | 'E', '_') => {
match self.eat_underscore_exponent() {
Some(suffix_start) => {
// The suffix begins at `e`, meaning the number is an integer.
(Int { base, empty_int: false }, Some(suffix_start))
}
None => (Float { base, empty_exponent: false }, None),
}
Float { base, empty_exponent }
}
'e' | 'E' => {
('e' | 'E', '0'..='9' | '+' | '-') => {
// Definitely an exponent (which still can be empty).
self.bump();
let empty_exponent = !self.eat_float_exponent();
Float { base, empty_exponent }
(Float { base, empty_exponent }, None)
}
_ => Int { base, empty_int: false },
_ => (Int { base, empty_int: false }, None),
}
}

/// Try to find and eat an exponent
///
/// Assumes the first character is `e`/`E` and second is `_`, and consumes
/// `e`/`E` followed by all consecutive `_`s.
///
/// Returns `Some` if no exponent was found. In this case, the suffix is partially
/// consumed, and began at the return value.
fn eat_underscore_exponent(&mut self) -> Option<u32> {
debug_assert!(matches!(self.first(), 'e' | 'E'));
debug_assert!(matches!(self.second(), '_'));
let suffix_start = self.pos_within_token();

// check if series of `_` is ended by a digit. If yes
// include it in the number as exponent. If no include
// it in suffix.
self.bump();
while matches!(self.first(), '_') {
self.bump();
}
// If we find a digit, then the exponential was valid
// so the suffix will start at the cursor as usual.
if self.first().is_ascii_digit() {
self.eat_decimal_digits();
None
} else {
Some(suffix_start)
}
}

Expand Down Expand Up @@ -924,6 +965,7 @@ impl Cursor<'_> {
}
}

/// Returns `true` if a digit was consumed (rather than just '_'s).
fn eat_decimal_digits(&mut self) -> bool {
let mut has_digits = false;
loop {
Expand Down Expand Up @@ -961,20 +1003,20 @@ impl Cursor<'_> {
/// Eats the float exponent. Returns true if at least one digit was met,
/// and returns false otherwise.
fn eat_float_exponent(&mut self) -> bool {
debug_assert!(self.prev() == 'e' || self.prev() == 'E');
debug_assert!(matches!(self.prev(), 'e' | 'E'));
if self.first() == '-' || self.first() == '+' {
self.bump();
}
self.eat_decimal_digits()
}

// Eats the suffix of the literal, e.g. "u8".
/// Eats the suffix of the literal, e.g. "u8".
fn eat_literal_suffix(&mut self) {
self.eat_identifier();
self.eat_identifier()
}

// Eats the identifier. Note: succeeds on `_`, which isn't a valid
// identifier.
/// Eats the identifier. Note: succeeds on `_`, which isn't a valid
/// identifier.
fn eat_identifier(&mut self) {
if !is_id_start(self.first()) {
return;
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_session/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ session_embed_source_insufficient_dwarf_version = `-Zembed-source=y` requires at

session_embed_source_requires_debug_info = `-Zembed-source=y` requires debug information to be enabled

session_empty_float_exponent = expected at least one digit in exponent

session_expr_parentheses_needed = parentheses are required to parse this as an expression

session_failed_to_create_profiler = failed to create profiler: {$err}
Expand Down
15 changes: 15 additions & 0 deletions compiler/rustc_session/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,10 @@ pub fn report_lit_error(
s.len() > 1 && s.starts_with(first_chars) && s[1..].chars().all(|c| c.is_ascii_digit())
}

fn looks_like_empty_exponent(s: &str) -> bool {
s.len() == 1 && matches!(s.chars().next(), Some('e' | 'E'))
}

// Try to lowercase the prefix if the prefix and suffix are valid.
fn fix_base_capitalisation(prefix: &str, suffix: &str) -> Option<String> {
let mut chars = suffix.chars();
Expand Down Expand Up @@ -409,6 +413,8 @@ pub fn report_lit_error(
if looks_like_width_suffix(&['i', 'u'], suf) {
// If it looks like a width, try to be helpful.
dcx.emit_err(InvalidIntLiteralWidth { span, width: suf[1..].into() })
} else if looks_like_empty_exponent(suf) {
dcx.emit_err(EmptyFloatExponent { span })
} else if let Some(fixed) = fix_base_capitalisation(lit.symbol.as_str(), suf) {
dcx.emit_err(InvalidNumLiteralBasePrefix { span, fixed })
} else {
Expand All @@ -420,6 +426,8 @@ pub fn report_lit_error(
if looks_like_width_suffix(&['f'], suf) {
// If it looks like a width, try to be helpful.
dcx.emit_err(InvalidFloatLiteralWidth { span, width: suf[1..].to_string() })
} else if looks_like_empty_exponent(suf) {
dcx.emit_err(EmptyFloatExponent { span })
} else {
dcx.emit_err(InvalidFloatLiteralSuffix { span, suffix: suf.to_string() })
}
Expand Down Expand Up @@ -489,3 +497,10 @@ pub(crate) struct SoftFloatIgnored;
#[note]
#[note(session_soft_float_deprecated_issue)]
pub(crate) struct SoftFloatDeprecated;

#[derive(Diagnostic)]
#[diag(session_empty_float_exponent)]
pub(crate) struct EmptyFloatExponent {
#[primary_span]
pub span: Span,
}
72 changes: 36 additions & 36 deletions tests/ui/consts/const-eval/issue-104390.stderr
Original file line number Diff line number Diff line change
@@ -1,39 +1,3 @@
error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:1:27
|
LL | fn f1() -> impl Sized { & 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:2:28
|
LL | fn f2() -> impl Sized { && 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:3:29
|
LL | fn f3() -> impl Sized { &'a 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:5:34
|
LL | fn f4() -> impl Sized { &'static 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:7:28
|
LL | fn f5() -> impl Sized { *& 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:8:29
|
LL | fn f6() -> impl Sized { &'_ 2E }
| ^^

error: borrow expressions cannot be annotated with lifetimes
--> $DIR/issue-104390.rs:3:25
|
Expand Down Expand Up @@ -76,5 +40,41 @@ LL - fn f6() -> impl Sized { &'_ 2E }
LL + fn f6() -> impl Sized { &2E }
|

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:1:27
|
LL | fn f1() -> impl Sized { & 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:2:28
|
LL | fn f2() -> impl Sized { && 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:3:29
|
LL | fn f3() -> impl Sized { &'a 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:5:34
|
LL | fn f4() -> impl Sized { &'static 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:7:28
|
LL | fn f5() -> impl Sized { *& 2E }
| ^^

error: expected at least one digit in exponent
--> $DIR/issue-104390.rs:8:29
|
LL | fn f6() -> impl Sized { &'_ 2E }
| ^^

error: aborting due to 9 previous errors

12 changes: 6 additions & 6 deletions tests/ui/consts/issue-91434.stderr
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
error: expected at least one digit in exponent
--> $DIR/issue-91434.rs:2:11
|
LL | [9; [[9E; h]]];
| ^^

error[E0425]: cannot find value `h` in this scope
--> $DIR/issue-91434.rs:2:15
|
LL | [9; [[9E; h]]];
| ^ not found in this scope

error: expected at least one digit in exponent
--> $DIR/issue-91434.rs:2:11
|
LL | [9; [[9E; h]]];
| ^^

error: aborting due to 2 previous errors

For more information about this error, try `rustc --explain E0425`.
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
error: expected at least one digit in exponent
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
|
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
| ^^^^^^

error: unknown start of token: \u{2212}
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:53
|
Expand All @@ -16,5 +10,11 @@ LL - const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹
LL + const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e-11; // m³⋅kg⁻¹⋅s⁻²
|

error: expected at least one digit in exponent
--> $DIR/issue-49746-unicode-confusable-in-float-literal-expt.rs:1:47
|
LL | const UNIVERSAL_GRAVITATIONAL_CONSTANT: f64 = 6.674e−11; // m³⋅kg⁻¹⋅s⁻²
| ^^^^^^

error: aborting due to 2 previous errors

16 changes: 16 additions & 0 deletions tests/ui/lexer/custom-suffixes-exponent-like.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
const _A: f64 = 1em;
//~^ ERROR invalid suffix `em` for number literal
const _B: f64 = 1e0m;
//~^ ERROR invalid suffix `m` for float literal
const _C: f64 = 1e_______________0m;
//~^ ERROR invalid suffix `m` for float literal
const _D: f64 = 1e_______________m;
//~^ ERROR invalid suffix `e_______________m` for number literal

// All the above patterns should not generate an error when used in a macro
macro_rules! do_nothing {
($($toks:tt)*) => {};
}
do_nothing!(1em 1e0m 1e_______________0m 1e_______________m);

fn main() {}
Loading
Loading