Skip to content

Rollup of 7 pull requests #127143

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
b03d939
Improve autovectorization of to_lowercase / to_uppercase functions
jhorstmann Apr 10, 2024
a7aafd7
Add codegen test to ensure the pattern used in convert_while_ascii ge…
jhorstmann Jun 3, 2024
b5ea85f
Remove redundant tests
jhorstmann Jun 3, 2024
879d143
Add `.ignore` file to make `config.toml` searchable in vscode
WaffleLapkin Jun 23, 2024
39bf1dc
Small fixme in core now that split_first has no codegen issues
GrigorenkoPV Jun 24, 2024
5901a2d
Use a fixed chunk size in codegen test
jhorstmann Jun 28, 2024
28ba5e4
Updated docs on `#[panic_handler]` in `library/core/src/lib.rs`
Jun 19, 2024
af8be47
Use a fixed chunk size also in production code and update some comments
jhorstmann Jun 29, 2024
623b9c8
Update comment in codegen test
jhorstmann Jun 29, 2024
e52d95b
Remove unused compiler dependencies
Kobzol Jun 29, 2024
8b783f1
rustdoc: update to pulldown-cmark 0.11
notriddle Jun 29, 2024
61e2f5c
rustdoc: add usable lint for pulldown-cmark-0.11 parsing changes
notriddle Jun 29, 2024
a5fc783
clippy: update to pulldown-cmark 0.11
notriddle Jun 29, 2024
682e7c1
Print `TypeId` as a `u128` for `Debug`
tgross35 Jun 29, 2024
bef24a4
Rollup merge of #123778 - jhorstmann:optimize-upper-lower-auto-vector…
matthiaskrgr Jun 30, 2024
b19c233
Rollup merge of #126705 - safinaskar:panic, r=Mark-Simulacrum
matthiaskrgr Jun 30, 2024
d091d48
Rollup merge of #126876 - WaffleLapkin:unignoreconfigtoml, r=Mark-Sim…
matthiaskrgr Jun 30, 2024
4cf6f8b
Rollup merge of #126906 - GrigorenkoPV:fixme-split_at_first, r=Mark-S…
matthiaskrgr Jun 30, 2024
a23f956
Rollup merge of #127127 - notriddle:notriddle/pulldown-cmark-0.11, r=…
matthiaskrgr Jun 30, 2024
1fce09b
Rollup merge of #127131 - Kobzol:remove-unused-deps, r=compiler-errors
matthiaskrgr Jun 30, 2024
865788e
Rollup merge of #127134 - tgross35:typeid-debug, r=Nilstrieb
matthiaskrgr Jun 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .ignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Make vscode *not* count `config.toml` as ignored, so it is included in search
!/config.toml
1 change: 1 addition & 0 deletions .reuse/dep5
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ Files: compiler/*
.gitignore
.gitmodules
.mailmap
.ignore
Copyright: The Rust Project Developers (see https://thanks.rust-lang.org)
License: MIT or Apache-2.0

Expand Down
27 changes: 21 additions & 6 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3141,7 +3141,19 @@ dependencies = [
"bitflags 2.5.0",
"getopts",
"memchr",
"pulldown-cmark-escape",
"pulldown-cmark-escape 0.10.1",
"unicase",
]

[[package]]
name = "pulldown-cmark"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8746739f11d39ce5ad5c2520a9b75285310dbfe78c541ccf832d38615765aec0"
dependencies = [
"bitflags 2.5.0",
"memchr",
"pulldown-cmark-escape 0.11.0",
"unicase",
]

Expand All @@ -3151,6 +3163,12 @@ version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd348ff538bc9caeda7ee8cad2d1d48236a1f443c1fa3913c6a02fe0043b1dd3"

[[package]]
name = "pulldown-cmark-escape"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "007d8adb5ddab6f8e3f491ac63566a7d5002cc7ed73901f72057943fa71ae1ae"

[[package]]
name = "pulldown-cmark-to-cmark"
version = "13.0.0"
Expand Down Expand Up @@ -4597,7 +4615,7 @@ name = "rustc_resolve"
version = "0.0.0"
dependencies = [
"bitflags 2.5.0",
"pulldown-cmark 0.9.6",
"pulldown-cmark 0.11.0",
"rustc_arena",
"rustc_ast",
"rustc_ast_pretty",
Expand Down Expand Up @@ -4753,8 +4771,6 @@ checksum = "8ba09476327c4b70ccefb6180f046ef588c26a24cf5d269a9feba316eb4f029f"
name = "rustc_trait_selection"
version = "0.0.0"
dependencies = [
"bitflags 2.5.0",
"derivative",
"itertools",
"rustc_ast",
"rustc_ast_ir",
Expand All @@ -4763,7 +4779,6 @@ dependencies = [
"rustc_errors",
"rustc_fluent_macro",
"rustc_hir",
"rustc_index",
"rustc_infer",
"rustc_macros",
"rustc_middle",
Expand All @@ -4776,7 +4791,6 @@ dependencies = [
"rustc_target",
"rustc_transmute",
"rustc_type_ir",
"rustc_type_ir_macros",
"smallvec",
"tracing",
]
Expand Down Expand Up @@ -4880,6 +4894,7 @@ dependencies = [
"indexmap",
"itertools",
"minifier",
"pulldown-cmark 0.9.6",
"regex",
"rustdoc-json-types",
"serde",
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_resolve/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
bitflags = "2.4.1"
pulldown-cmark = { version = "0.9.6", default-features = false }
pulldown-cmark = { version = "0.11", features = ["html"], default-features = false }
rustc_arena = { path = "../rustc_arena" }
rustc_ast = { path = "../rustc_ast" }
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
Expand Down
14 changes: 9 additions & 5 deletions compiler/rustc_resolve/src/rustdoc.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser, Tag};
use pulldown_cmark::{
BrokenLink, BrokenLinkCallback, CowStr, Event, LinkType, Options, Parser, Tag,
};
use rustc_ast as ast;
use rustc_ast::util::comments::beautify_doc_string;
use rustc_data_structures::fx::FxHashMap;
Expand Down Expand Up @@ -427,7 +429,9 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {

while let Some(event) = event_iter.next() {
match event {
Event::Start(Tag::Link(link_type, dest, _)) if may_be_doc_link(link_type) => {
Event::Start(Tag::Link { link_type, dest_url, title: _, id: _ })
if may_be_doc_link(link_type) =>
{
if matches!(
link_type,
LinkType::Inline
Expand All @@ -441,7 +445,7 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
}
}

links.push(preprocess_link(&dest));
links.push(preprocess_link(&dest_url));
}
_ => {}
}
Expand All @@ -451,8 +455,8 @@ fn parse_links<'md>(doc: &'md str) -> Vec<Box<str>> {
}

/// Collects additional data of link.
fn collect_link_data<'input, 'callback>(
event_iter: &mut Parser<'input, 'callback>,
fn collect_link_data<'input, F: BrokenLinkCallback<'input>>(
event_iter: &mut Parser<'input, F>,
) -> Option<Box<str>> {
let mut display_text: Option<String> = None;
let mut append_text = |text: CowStr<'_>| {
Expand Down
4 changes: 0 additions & 4 deletions compiler/rustc_trait_selection/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@ edition = "2021"

[dependencies]
# tidy-alphabetical-start
bitflags = "2.4.1"
derivative = "2.2.0"
itertools = "0.12"
rustc_ast = { path = "../rustc_ast" }
rustc_ast_ir = { path = "../rustc_ast_ir" }
Expand All @@ -15,7 +13,6 @@ rustc_data_structures = { path = "../rustc_data_structures" }
rustc_errors = { path = "../rustc_errors" }
rustc_fluent_macro = { path = "../rustc_fluent_macro" }
rustc_hir = { path = "../rustc_hir" }
rustc_index = { path = "../rustc_index" }
rustc_infer = { path = "../rustc_infer" }
rustc_macros = { path = "../rustc_macros" }
rustc_middle = { path = "../rustc_middle" }
Expand All @@ -28,7 +25,6 @@ rustc_span = { path = "../rustc_span" }
rustc_target = { path = "../rustc_target" }
rustc_transmute = { path = "../rustc_transmute", features = ["rustc"] }
rustc_type_ir = { path = "../rustc_type_ir" }
rustc_type_ir_macros = { path = "../rustc_type_ir_macros" }
smallvec = { version = "1.8.1", features = ["union", "may_dangle"] }
tracing = "0.1"
# tidy-alphabetical-end
2 changes: 2 additions & 0 deletions library/alloc/benches/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,3 +347,5 @@ make_test!(rsplitn_space_char, s, s.rsplitn(10, ' ').count());

make_test!(split_space_str, s, s.split(" ").count());
make_test!(split_ad_str, s, s.split("ad").count());

make_test!(to_lowercase, s, s.to_lowercase());
125 changes: 73 additions & 52 deletions library/alloc/src/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
use core::borrow::{Borrow, BorrowMut};
use core::iter::FusedIterator;
use core::mem;
use core::mem::MaybeUninit;
use core::ptr;
use core::str::pattern::{DoubleEndedSearcher, Pattern, ReverseSearcher, Searcher};
use core::unicode::conversions;
Expand Down Expand Up @@ -367,14 +368,9 @@ impl str {
without modifying the original"]
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
pub fn to_lowercase(&self) -> String {
let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_lowercase);
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_lowercase);

// Safety: we know this is a valid char boundary since
// out.len() is only progressed if ascii bytes are found
let rest = unsafe { self.get_unchecked(out.len()..) };

// Safety: We have written only valid ASCII to our vec
let mut s = unsafe { String::from_utf8_unchecked(out) };
let prefix_len = s.len();

for (i, c) in rest.char_indices() {
if c == 'Σ' {
Expand All @@ -383,8 +379,7 @@ impl str {
// in `SpecialCasing.txt`,
// so hard-code it rather than have a generic "condition" mechanism.
// See https://github.com/rust-lang/rust/issues/26035
let out_len = self.len() - rest.len();
let sigma_lowercase = map_uppercase_sigma(&self, i + out_len);
let sigma_lowercase = map_uppercase_sigma(self, prefix_len + i);
s.push(sigma_lowercase);
} else {
match conversions::to_lower(c) {
Expand Down Expand Up @@ -460,14 +455,7 @@ impl str {
without modifying the original"]
#[stable(feature = "unicode_case_mapping", since = "1.2.0")]
pub fn to_uppercase(&self) -> String {
let out = convert_while_ascii(self.as_bytes(), u8::to_ascii_uppercase);

// Safety: we know this is a valid char boundary since
// out.len() is only progressed if ascii bytes are found
let rest = unsafe { self.get_unchecked(out.len()..) };

// Safety: We have written only valid ASCII to our vec
let mut s = unsafe { String::from_utf8_unchecked(out) };
let (mut s, rest) = convert_while_ascii(self, u8::to_ascii_uppercase);

for c in rest.chars() {
match conversions::to_upper(c) {
Expand Down Expand Up @@ -616,50 +604,83 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
unsafe { Box::from_raw(Box::into_raw(v) as *mut str) }
}

/// Converts the bytes while the bytes are still ascii.
/// Converts leading ascii bytes in `s` by calling the `convert` function.
///
/// For better average performance, this happens in chunks of `2*size_of::<usize>()`.
/// Returns a vec with the converted bytes.
///
/// Returns a tuple of the converted prefix and the remainder starting from
/// the first non-ascii character.
#[inline]
#[cfg(not(test))]
#[cfg(not(no_global_oom_handling))]
fn convert_while_ascii(b: &[u8], convert: fn(&u8) -> u8) -> Vec<u8> {
let mut out = Vec::with_capacity(b.len());
fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
// Process the input in chunks of 16 bytes to enable auto-vectorization.
// Previously the chunk size depended on the size of `usize`,
// but on 32-bit platforms with sse or neon is also the better choice.
// The only downside on other platforms would be a bit more loop-unrolling.
const N: usize = 16;

let mut slice = s.as_bytes();
let mut out = Vec::with_capacity(slice.len());
let mut out_slice = out.spare_capacity_mut();

let mut ascii_prefix_len = 0_usize;
let mut is_ascii = [false; N];

while slice.len() >= N {
// Safety: checked in loop condition
let chunk = unsafe { slice.get_unchecked(..N) };
// Safety: out_slice has at least same length as input slice and gets sliced with the same offsets
let out_chunk = unsafe { out_slice.get_unchecked_mut(..N) };

for j in 0..N {
is_ascii[j] = chunk[j] <= 127;
}

const USIZE_SIZE: usize = mem::size_of::<usize>();
const MAGIC_UNROLL: usize = 2;
const N: usize = USIZE_SIZE * MAGIC_UNROLL;
const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; USIZE_SIZE]);
// Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
// size gives the best result, specifically a pmovmsk instruction on x86.
// There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should
// be updated when this method is changed.
// See also https://github.com/llvm/llvm-project/issues/96395
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
break;
}

let mut i = 0;
unsafe {
while i + N <= b.len() {
// Safety: we have checks the sizes `b` and `out` to know that our
let in_chunk = b.get_unchecked(i..i + N);
let out_chunk = out.spare_capacity_mut().get_unchecked_mut(i..i + N);

let mut bits = 0;
for j in 0..MAGIC_UNROLL {
// read the bytes 1 usize at a time (unaligned since we haven't checked the alignment)
// safety: in_chunk is valid bytes in the range
bits |= in_chunk.as_ptr().cast::<usize>().add(j).read_unaligned();
}
// if our chunks aren't ascii, then return only the prior bytes as init
if bits & NONASCII_MASK != 0 {
break;
}
for j in 0..N {
out_chunk[j] = MaybeUninit::new(convert(&chunk[j]));
}

// perform the case conversions on N bytes (gets heavily autovec'd)
for j in 0..N {
// safety: in_chunk and out_chunk is valid bytes in the range
let out = out_chunk.get_unchecked_mut(j);
out.write(convert(in_chunk.get_unchecked(j)));
}
ascii_prefix_len += N;
slice = unsafe { slice.get_unchecked(N..) };
out_slice = unsafe { out_slice.get_unchecked_mut(N..) };
}

// mark these bytes as initialised
i += N;
// handle the remainder as individual bytes
while slice.len() > 0 {
let byte = slice[0];
if byte > 127 {
break;
}
out.set_len(i);
// Safety: out_slice has same length as input slice and gets sliced with the same offsets
unsafe {
*out_slice.get_unchecked_mut(0) = MaybeUninit::new(convert(&byte));
}
ascii_prefix_len += 1;
slice = unsafe { slice.get_unchecked(1..) };
out_slice = unsafe { out_slice.get_unchecked_mut(1..) };
}

out
unsafe {
// SAFETY: ascii_prefix_len bytes have been initialized above
out.set_len(ascii_prefix_len);

// SAFETY: We have written only valid ascii to the output vec
let ascii_string = String::from_utf8_unchecked(out);

// SAFETY: we know this is a valid char boundary
// since we only skipped over leading ascii bytes
let rest = core::str::from_utf8_unchecked(slice);

(ascii_string, rest)
}
}
3 changes: 3 additions & 0 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1849,7 +1849,10 @@ fn to_lowercase() {
assert_eq!("ΑΣ''Α".to_lowercase(), "ασ''α");

// https://github.com/rust-lang/rust/issues/124714
// input lengths around the boundary of the chunk size used by the ascii prefix optimization
assert_eq!("abcdefghijklmnoΣ".to_lowercase(), "abcdefghijklmnoς");
assert_eq!("abcdefghijklmnopΣ".to_lowercase(), "abcdefghijklmnopς");
assert_eq!("abcdefghijklmnopqΣ".to_lowercase(), "abcdefghijklmnopqς");

// a really long string that has it's lowercase form
// even longer. this tests that implementations don't assume
Expand Down
13 changes: 12 additions & 1 deletion library/core/src/any.rs
Original file line number Diff line number Diff line change
Expand Up @@ -602,7 +602,7 @@ impl dyn Any + Send + Sync {
/// While `TypeId` implements `Hash`, `PartialOrd`, and `Ord`, it is worth
/// noting that the hashes and ordering will vary between Rust releases. Beware
/// of relying on them inside of your code!
#[derive(Clone, Copy, Debug, Eq, PartialOrd, Ord)]
#[derive(Clone, Copy, Eq, PartialOrd, Ord)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct TypeId {
// We avoid using `u128` because that imposes higher alignment requirements on many platforms.
Expand Down Expand Up @@ -644,6 +644,10 @@ impl TypeId {
let t2 = t as u64;
TypeId { t: (t1, t2) }
}

fn as_u128(self) -> u128 {
u128::from(self.t.0) << 64 | u128::from(self.t.1)
}
}

#[stable(feature = "rust1", since = "1.0.0")]
Expand All @@ -666,6 +670,13 @@ impl hash::Hash for TypeId {
}
}

#[stable(feature = "rust1", since = "1.0.0")]
impl fmt::Debug for TypeId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
f.debug_tuple("TypeId").field(&self.as_u128()).finish()
}
}

/// Returns the name of a type as a string slice.
///
/// # Note
Expand Down
7 changes: 2 additions & 5 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,9 @@
//! Rust user code is to call the functions provided by this library instead (such as
//! `ptr::copy`).
//!
//! * `rust_begin_panic` - This function takes four arguments, a
//! `fmt::Arguments`, a `&'static str`, and two `u32`'s. These four arguments
//! dictate the panic message, the file at which panic was invoked, and the
//! line and column inside the file. It is up to consumers of this core
//! * Panic handler - This function takes one argument, a `&panic::PanicInfo`. It is up to consumers of this core
//! library to define this panic function; it is only required to never
//! return. This requires a `lang` attribute named `panic_impl`.
//! return. You should mark your implementation using `#[panic_handler]`.
//!
//! * `rust_eh_personality` - is used by the failure mechanisms of the
//! compiler. This is often mapped to GCC's personality function, but crates
Expand Down
Loading
Loading