Skip to content

Commit 8d4907e

Browse files
committedFeb 21, 2025
Use chunks_exact in analyze_source_file_sse2
1 parent a18bd8a commit 8d4907e

File tree

1 file changed

+8
-14
lines changed

1 file changed

+8
-14
lines changed
 

‎compiler/rustc_span/src/analyze_source_file.rs

+8-14
Original file line numberDiff line numberDiff line change
@@ -69,21 +69,18 @@ cfg_match! {
6969

7070
const CHUNK_SIZE: usize = 16;
7171

72-
let src_bytes = src.as_bytes();
73-
74-
let chunk_count = src.len() / CHUNK_SIZE;
72+
let mut chunks = src.as_bytes().chunks_exact(CHUNK_SIZE);
7573

7674
// This variable keeps track of where we should start decoding a
7775
// chunk. If a multi-byte character spans across chunk boundaries,
7876
// we need to skip that part in the next chunk because we already
7977
// handled it.
8078
let mut intra_chunk_offset = 0;
8179

82-
for chunk_index in 0..chunk_count {
83-
let ptr = src_bytes.as_ptr() as *const __m128i;
80+
for (chunk_index, chunk) in chunks.by_ref().enumerate() {
8481
// We don't know if the pointer is aligned to 16 bytes, so we
8582
// use `loadu`, which supports unaligned loading.
86-
let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
83+
let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
8784

8885
// For character in the chunk, see if its byte value is < 0, which
8986
// indicates that it's part of a UTF-8 char.
@@ -124,7 +121,7 @@ cfg_match! {
124121
}
125122

126123
// There might still be a tail left to analyze
127-
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
124+
let tail_start = src.len() - chunks.remainder().len() + intra_chunk_offset;
128125
if tail_start < src.len() {
129126
analyze_source_file_generic(
130127
&src[tail_start..],
@@ -194,21 +191,18 @@ cfg_match! {
194191

195192
const CHUNK_SIZE: usize = 16;
196193

197-
let src_bytes = src.as_bytes();
198-
199-
let chunk_count = src.len() / CHUNK_SIZE;
194+
let mut chunks = src.as_bytes().chunks_exact(CHUNK_SIZE);
200195

201196
// This variable keeps track of where we should start decoding a
202197
// chunk. If a multi-byte character spans across chunk boundaries,
203198
// we need to skip that part in the next chunk because we already
204199
// handled it.
205200
let mut intra_chunk_offset = 0;
206201

207-
for chunk_index in 0..chunk_count {
208-
let ptr = src_bytes.as_ptr() as *const __m128i;
202+
for (chunk_index, chunk) in chunks.by_ref().enumerate() {
209203
// We don't know if the pointer is aligned to 16 bytes, so we
210204
// use `loadu`, which supports unaligned loading.
211-
let chunk = unsafe { _mm_loadu_si128(ptr.add(chunk_index)) };
205+
let chunk = unsafe { _mm_loadu_si128(chunk.as_ptr() as *const __m128i) };
212206

213207
// For character in the chunk, see if its byte value is < 0, which
214208
// indicates that it's part of a UTF-8 char.
@@ -249,7 +243,7 @@ cfg_match! {
249243
}
250244

251245
// There might still be a tail left to analyze
252-
let tail_start = chunk_count * CHUNK_SIZE + intra_chunk_offset;
246+
let tail_start = src.len() - chunks.remainder().len() + intra_chunk_offset;
253247
if tail_start < src.len() {
254248
analyze_source_file_generic(
255249
&src[tail_start..],

0 commit comments

Comments
 (0)