Skip to content

Commit 69e98c6

Browse files
authored
Unrolled build for rust-lang#133057
Rollup merge of rust-lang#133057 - tisonkun:into-chars, r=Amanieu Impl String::into_chars Tracking issue - rust-lang#133125 r? `@programmerjake` `@kennytm` `@Amanieu` This refers to rust-lang/libs-team#268 Before adding tests and creating a tracking issue, I'd like to reach a consensus on the implementation direction and two questions: 1. Whether we'd add a `String::into_char_indices` method also? 2. See inline comment.
2 parents 1f81f90 + 7218fd1 commit 69e98c6

File tree

1 file changed

+185
-2
lines changed

1 file changed

+185
-2
lines changed

library/alloc/src/string.rs

+185-2
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,10 @@ use crate::alloc::Allocator;
6262
use crate::borrow::{Cow, ToOwned};
6363
use crate::boxed::Box;
6464
use crate::collections::TryReserveError;
65-
use crate::str::{self, Chars, Utf8Error, from_utf8_unchecked_mut};
65+
use crate::str::{self, CharIndices, Chars, Utf8Error, from_utf8_unchecked_mut};
6666
#[cfg(not(no_global_oom_handling))]
6767
use crate::str::{FromStr, from_boxed_utf8_unchecked};
68-
use crate::vec::Vec;
68+
use crate::vec::{self, Vec};
6969

7070
/// A UTF-8–encoded, growable string.
7171
///
@@ -1952,6 +1952,61 @@ impl String {
19521952
Drain { start, end, iter: chars_iter, string: self_ptr }
19531953
}
19541954

1955+
/// Converts a `String` into an iterator over the [`char`]s of the string.
1956+
///
1957+
/// As a string consists of valid UTF-8, we can iterate through a string
1958+
/// by [`char`]. This method returns such an iterator.
1959+
///
1960+
/// It's important to remember that [`char`] represents a Unicode Scalar
1961+
/// Value, and might not match your idea of what a 'character' is. Iteration
1962+
/// over grapheme clusters may be what you actually want. That functionality
1963+
/// is not provided by Rust's standard library, check crates.io instead.
1964+
///
1965+
/// # Examples
1966+
///
1967+
/// Basic usage:
1968+
///
1969+
/// ```
1970+
/// #![feature(string_into_chars)]
1971+
///
1972+
/// let word = String::from("goodbye");
1973+
///
1974+
/// let mut chars = word.into_chars();
1975+
///
1976+
/// assert_eq!(Some('g'), chars.next());
1977+
/// assert_eq!(Some('o'), chars.next());
1978+
/// assert_eq!(Some('o'), chars.next());
1979+
/// assert_eq!(Some('d'), chars.next());
1980+
/// assert_eq!(Some('b'), chars.next());
1981+
/// assert_eq!(Some('y'), chars.next());
1982+
/// assert_eq!(Some('e'), chars.next());
1983+
///
1984+
/// assert_eq!(None, chars.next());
1985+
/// ```
1986+
///
1987+
/// Remember, [`char`]s might not match your intuition about characters:
1988+
///
1989+
/// ```
1990+
/// #![feature(string_into_chars)]
1991+
///
1992+
/// let y = String::from("y̆");
1993+
///
1994+
/// let mut chars = y.into_chars();
1995+
///
1996+
/// assert_eq!(Some('y'), chars.next()); // not 'y̆'
1997+
/// assert_eq!(Some('\u{0306}'), chars.next());
1998+
///
1999+
/// assert_eq!(None, chars.next());
2000+
/// ```
2001+
///
2002+
/// [`char`]: prim@char
2003+
#[inline]
2004+
#[must_use = "`self` will be dropped if the result is not used"]
2005+
#[unstable(feature = "string_into_chars", issue = "133125")]
2006+
pub fn into_chars(self) -> IntoChars {
2007+
IntoChars { bytes: self.into_bytes().into_iter() }
2008+
}
2009+
19552010
/// Removes the specified range in the string,
19562011
/// and replaces it with the given string.
19572012
/// The given string doesn't need to be the same length as the range.
@@ -3090,6 +3145,134 @@ impl fmt::Write for String {
30903145
}
30913146
}
30923147

3148+
/// An iterator over the [`char`]s of a string.
3149+
///
3150+
/// This struct is created by the [`into_chars`] method on [`String`].
3151+
/// See its documentation for more.
3152+
///
3153+
/// [`char`]: prim@char
3154+
/// [`into_chars`]: String::into_chars
3155+
#[cfg_attr(not(no_global_oom_handling), derive(Clone))]
3156+
#[must_use = "iterators are lazy and do nothing unless consumed"]
3157+
#[unstable(feature = "string_into_chars", issue = "133125")]
3158+
pub struct IntoChars {
3159+
bytes: vec::IntoIter<u8>,
3160+
}
3161+
3162+
#[unstable(feature = "string_into_chars", issue = "133125")]
3163+
impl fmt::Debug for IntoChars {
3164+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
3165+
f.debug_tuple("IntoChars").field(&self.as_str()).finish()
3166+
}
3167+
}
3168+
3169+
impl IntoChars {
3170+
/// Views the underlying data as a subslice of the original data.
3171+
///
3172+
/// # Examples
3173+
///
3174+
/// ```
3175+
/// #![feature(string_into_chars)]
3176+
///
3177+
/// let mut chars = String::from("abc").into_chars();
3178+
///
3179+
/// assert_eq!(chars.as_str(), "abc");
3180+
/// chars.next();
3181+
/// assert_eq!(chars.as_str(), "bc");
3182+
/// chars.next();
3183+
/// chars.next();
3184+
/// assert_eq!(chars.as_str(), "");
3185+
/// ```
3186+
#[unstable(feature = "string_into_chars", issue = "133125")]
3187+
#[must_use]
3188+
#[inline]
3189+
pub fn as_str(&self) -> &str {
3190+
// SAFETY: `bytes` is a valid UTF-8 string.
3191+
unsafe { str::from_utf8_unchecked(self.bytes.as_slice()) }
3192+
}
3193+
3194+
/// Consumes the `IntoChars`, returning the remaining string.
3195+
///
3196+
/// # Examples
3197+
///
3198+
/// ```
3199+
/// #![feature(string_into_chars)]
3200+
///
3201+
/// let chars = String::from("abc").into_chars();
3202+
/// assert_eq!(chars.into_string(), "abc");
3203+
///
3204+
/// let mut chars = String::from("def").into_chars();
3205+
/// chars.next();
3206+
/// assert_eq!(chars.into_string(), "ef");
3207+
/// ```
3208+
#[cfg(not(no_global_oom_handling))]
3209+
#[unstable(feature = "string_into_chars", issue = "133125")]
3210+
#[inline]
3211+
pub fn into_string(self) -> String {
3212+
// Safety: `bytes` are kept in UTF-8 form, only removing whole `char`s at a time.
3213+
unsafe { String::from_utf8_unchecked(self.bytes.collect()) }
3214+
}
3215+
3216+
#[inline]
3217+
fn iter(&self) -> CharIndices<'_> {
3218+
self.as_str().char_indices()
3219+
}
3220+
}
3221+
3222+
#[unstable(feature = "string_into_chars", issue = "133125")]
3223+
impl Iterator for IntoChars {
3224+
type Item = char;
3225+
3226+
#[inline]
3227+
fn next(&mut self) -> Option<char> {
3228+
let mut iter = self.iter();
3229+
match iter.next() {
3230+
None => None,
3231+
Some((_, ch)) => {
3232+
let offset = iter.offset();
3233+
// `offset` is a valid index.
3234+
let _ = self.bytes.advance_by(offset);
3235+
Some(ch)
3236+
}
3237+
}
3238+
}
3239+
3240+
#[inline]
3241+
fn count(self) -> usize {
3242+
self.iter().count()
3243+
}
3244+
3245+
#[inline]
3246+
fn size_hint(&self) -> (usize, Option<usize>) {
3247+
self.iter().size_hint()
3248+
}
3249+
3250+
#[inline]
3251+
fn last(mut self) -> Option<char> {
3252+
self.next_back()
3253+
}
3254+
}
3255+
3256+
#[unstable(feature = "string_into_chars", issue = "133125")]
3257+
impl DoubleEndedIterator for IntoChars {
3258+
#[inline]
3259+
fn next_back(&mut self) -> Option<char> {
3260+
let len = self.as_str().len();
3261+
let mut iter = self.iter();
3262+
match iter.next_back() {
3263+
None => None,
3264+
Some((idx, ch)) => {
3265+
// `idx` is a valid index.
3266+
let _ = self.bytes.advance_back_by(len - idx);
3267+
Some(ch)
3268+
}
3269+
}
3270+
}
3271+
}
3272+
3273+
#[unstable(feature = "string_into_chars", issue = "133125")]
3274+
impl FusedIterator for IntoChars {}
3275+
30933276
/// A draining iterator for `String`.
30943277
///
30953278
/// This struct is created by the [`drain`] method on [`String`]. See its

0 commit comments

Comments
 (0)