From 2fd2670ea0a55b7f7481c5ce2a95c5c5033941d5 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 30 Dec 2015 21:45:24 +0100 Subject: [PATCH 1/4] Unify computation of length in `EscapeUnicode` The `offset` value was computed both in `next` and in `size_hint`; computing it in a single place ensures consistency and makes it easier to apply improvements. The value is now computed as soon as the iterator is constructed. This means that the time to compute it is spent immediately and cannot be avoided, but it also guarantees that it is only spent once. --- src/libcore/char.rs | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 0c3807d8ca0b5..df6044fa83928 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -299,7 +299,15 @@ impl CharExt for char { #[inline] fn escape_unicode(self) -> EscapeUnicode { - EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash } + let mut n = 0; + while (self as u32) >> (4 * (n + 1)) != 0 { + n += 1; + } + EscapeUnicode { + c: self, + state: EscapeUnicodeState::Backslash, + offset: n, + } } #[inline] @@ -420,7 +428,8 @@ pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option { #[stable(feature = "rust1", since = "1.0.0")] pub struct EscapeUnicode { c: char, - state: EscapeUnicodeState + state: EscapeUnicodeState, + offset: usize, } #[derive(Clone)] @@ -428,7 +437,7 @@ enum EscapeUnicodeState { Backslash, Type, LeftBrace, - Value(usize), + Value, RightBrace, Done, } @@ -448,19 +457,15 @@ impl Iterator for EscapeUnicode { Some('u') } EscapeUnicodeState::LeftBrace => { - let mut n = 0; - while (self.c as u32) >> (4 * (n + 1)) != 0 { - n += 1; - } - self.state = EscapeUnicodeState::Value(n); + self.state = EscapeUnicodeState::Value; Some('{') } - EscapeUnicodeState::Value(offset) => { - let c = from_digit(((self.c as u32) >> (offset * 4)) & 0xf, 16).unwrap(); - if offset == 0 { + EscapeUnicodeState::Value => { + let c = from_digit(((self.c as u32) >> (self.offset * 4)) & 0xf, 16).unwrap(); + if self.offset == 0 { self.state = EscapeUnicodeState::RightBrace; } else { - self.state = EscapeUnicodeState::Value(offset - 1); + self.offset -= 1; } Some(c) } @@ -473,18 +478,15 @@ impl Iterator for EscapeUnicode { } fn size_hint(&self) -> (usize, Option) { - let mut n = 0; - while (self.c as usize) >> (4 * (n + 1)) != 0 { - n += 1; - } let n = match self.state { - EscapeUnicodeState::Backslash => n + 5, - EscapeUnicodeState::Type => n + 4, - EscapeUnicodeState::LeftBrace => n + 3, - EscapeUnicodeState::Value(offset) => offset + 2, + EscapeUnicodeState::Backslash => 5, + EscapeUnicodeState::Type => 4, + EscapeUnicodeState::LeftBrace => 3, + EscapeUnicodeState::Value => 2, EscapeUnicodeState::RightBrace => 1, EscapeUnicodeState::Done => 0, }; + let n = n + self.offset; (n, Some(n)) } } From 7b33d39da93a9873fa002c6875c934fd13ec7d4a Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Wed, 30 Dec 2015 22:03:35 +0100 Subject: [PATCH 2/4] Improve computation of `EscapeUnicode` offset field Instead of iteratively scanning the bits, use `leading_zeros`. --- src/libcore/char.rs | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index df6044fa83928..1df2d0d3bc8cc 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -299,14 +299,16 @@ impl CharExt for char { #[inline] fn escape_unicode(self) -> EscapeUnicode { - let mut n = 0; - while (self as u32) >> (4 * (n + 1)) != 0 { - n += 1; - } + let c = self as u32; + // or-ing 1 ensures that for c==0 the code computes that one + // digit should be printed and (which is the same) avoids the + // (31 - 32) underflow + let msb = 31 - (c | 1).leading_zeros(); + let msdigit = msb / 4; EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash, - offset: n, + offset: msdigit as usize, } } From 79dfa2590006e50b7c7ba2788a317b56a162175a Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Thu, 28 Jan 2016 15:12:16 +0100 Subject: [PATCH 3/4] Improve naming and explanations --- src/libcore/char.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index 1df2d0d3bc8cc..c97d7b086f4de 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -300,15 +300,18 @@ impl CharExt for char { #[inline] fn escape_unicode(self) -> EscapeUnicode { let c = self as u32; + // or-ing 1 ensures that for c==0 the code computes that one // digit should be printed and (which is the same) avoids the // (31 - 32) underflow let msb = 31 - (c | 1).leading_zeros(); - let msdigit = msb / 4; + + // the index of the most significant hex digit + let ms_hex_digit = msb / 4; EscapeUnicode { c: self, state: EscapeUnicodeState::Backslash, - offset: msdigit as usize, + hex_digit_idx: ms_hex_digit as usize, } } @@ -431,7 +434,11 @@ pub fn encode_utf16_raw(mut ch: u32, dst: &mut [u16]) -> Option { pub struct EscapeUnicode { c: char, state: EscapeUnicodeState, - offset: usize, + + // The index of the next hex digit to be printed (0 if none), + // i.e. the number of remaining hex digits to be printed; + // increasing from the least significant digit: 0x543210 + hex_digit_idx: usize, } #[derive(Clone)] @@ -463,11 +470,11 @@ impl Iterator for EscapeUnicode { Some('{') } EscapeUnicodeState::Value => { - let c = from_digit(((self.c as u32) >> (self.offset * 4)) & 0xf, 16).unwrap(); - if self.offset == 0 { + let c = from_digit(((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf, 16).unwrap(); + if self.hex_digit_idx == 0 { self.state = EscapeUnicodeState::RightBrace; } else { - self.offset -= 1; + self.hex_digit_idx -= 1; } Some(c) } @@ -488,7 +495,7 @@ impl Iterator for EscapeUnicode { EscapeUnicodeState::RightBrace => 1, EscapeUnicodeState::Done => 0, }; - let n = n + self.offset; + let n = n + self.hex_digit_idx; (n, Some(n)) } } From 8984242373d1317494f32e8f7326ac7d8fd52006 Mon Sep 17 00:00:00 2001 From: Andrea Canciani Date: Thu, 28 Jan 2016 15:18:42 +0100 Subject: [PATCH 4/4] Fix `make tidy` and name what is being computed --- src/libcore/char.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/libcore/char.rs b/src/libcore/char.rs index c97d7b086f4de..1b2083929cebc 100644 --- a/src/libcore/char.rs +++ b/src/libcore/char.rs @@ -470,7 +470,8 @@ impl Iterator for EscapeUnicode { Some('{') } EscapeUnicodeState::Value => { - let c = from_digit(((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf, 16).unwrap(); + let hex_digit = ((self.c as u32) >> (self.hex_digit_idx * 4)) & 0xf; + let c = from_digit(hex_digit, 16).unwrap(); if self.hex_digit_idx == 0 { self.state = EscapeUnicodeState::RightBrace; } else {