From 0031807e5ad8e2e2613f94dc9fa241b0c495115d Mon Sep 17 00:00:00 2001 From: Tapan Prakash Date: Fri, 4 Apr 2025 17:12:58 +0530 Subject: [PATCH 1/3] Improve unescape error reporting for multibyte characters in byte literals --- .../src/lexer/unescape_error_reporting.rs | 11 +----- tests/ui/suggestions/multibyte-escapes.rs | 12 +++++- tests/ui/suggestions/multibyte-escapes.stderr | 37 +++++++++++++++---- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs index 2e066f0179c3f..ad22e310b13c5 100644 --- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs +++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs @@ -199,16 +199,7 @@ pub(crate) fn emit_unescape_error( err.span_label(span, format!("must be ASCII{postfix}")); // Note: the \\xHH suggestions are not given for raw byte string // literals, because they are araw and so cannot use any escapes. - if (c as u32) <= 0xFF && mode != Mode::RawByteStr { - err.span_suggestion( - span, - format!( - "if you meant to use the unicode code point for {c:?}, use a \\xHH escape" - ), - format!("\\x{:X}", c as u32), - Applicability::MaybeIncorrect, - ); - } else if mode == Mode::Byte { + if mode == Mode::Byte { err.span_label(span, "this multibyte character does not fit into a single byte"); } else if mode != Mode::RawByteStr { let mut utf8 = String::new(); diff --git a/tests/ui/suggestions/multibyte-escapes.rs b/tests/ui/suggestions/multibyte-escapes.rs index c4105186244db..5cec7c7881124 100644 --- a/tests/ui/suggestions/multibyte-escapes.rs +++ b/tests/ui/suggestions/multibyte-escapes.rs @@ -3,7 +3,7 @@ fn main() { b'µ'; //~^ ERROR: non-ASCII character in byte literal - //~| HELP: if you meant to use the unicode code point for 'µ', use a \xHH escape + //~| NOTE: this multibyte character does not fit into a single byte //~| NOTE: must be ASCII b'字'; @@ -15,4 +15,14 @@ fn main() { //~^ ERROR: non-ASCII character in byte string literal //~| HELP: if you meant to use the UTF-8 encoding of '字', use \xHH escapes //~| NOTE: must be ASCII + + b"µ"; + //~^ ERROR: non-ASCII character in byte string literal + //~| HELP: if you meant to use the UTF-8 encoding of 'µ', use \xHH escapes + //~| NOTE: must be ASCII + + b"ñ"; + //~^ ERROR: non-ASCII character in byte string literal + //~| HELP: if you meant to use the UTF-8 encoding of 'ñ', use \xHH escapes + //~| NOTE: must be ASCII } diff --git a/tests/ui/suggestions/multibyte-escapes.stderr b/tests/ui/suggestions/multibyte-escapes.stderr index 7208d2f5be891..bfe66870b4d3b 100644 --- a/tests/ui/suggestions/multibyte-escapes.stderr +++ b/tests/ui/suggestions/multibyte-escapes.stderr @@ -2,13 +2,10 @@ error: non-ASCII character in byte literal --> $DIR/multibyte-escapes.rs:4:7 | LL | b'µ'; - | ^ must be ASCII - | -help: if you meant to use the unicode code point for 'µ', use a \xHH escape - | -LL - b'µ'; -LL + b'\xB5'; - | + | ^ + | | + | must be ASCII + | this multibyte character does not fit into a single byte error: non-ASCII character in byte literal --> $DIR/multibyte-escapes.rs:9:7 @@ -31,5 +28,29 @@ LL - b"字"; LL + b"\xE5\xAD\x97"; | -error: aborting due to 3 previous errors +error: non-ASCII character in byte string literal + --> $DIR/multibyte-escapes.rs:19:7 + | +LL | b"µ"; + | ^ must be ASCII + | +help: if you meant to use the UTF-8 encoding of 'µ', use \xHH escapes + | +LL - b"µ"; +LL + b"\xC2\xB5"; + | + +error: non-ASCII character in byte string literal + --> $DIR/multibyte-escapes.rs:24:7 + | +LL | b"ñ"; + | ^ must be ASCII + | +help: if you meant to use the UTF-8 encoding of 'ñ', use \xHH escapes + | +LL - b"ñ"; +LL + b"\xC3\xB1"; + | + +error: aborting due to 5 previous errors From 7896e940cd842769ad0473f9547cbe9c0b4378a1 Mon Sep 17 00:00:00 2001 From: Tapan Prakash Date: Fri, 4 Apr 2025 20:15:17 +0530 Subject: [PATCH 2/3] Fix failing test case --- tests/ui/parser/byte-literals.stderr | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/tests/ui/parser/byte-literals.stderr b/tests/ui/parser/byte-literals.stderr index 1c89e8e2864b6..b998cf55e8273 100644 --- a/tests/ui/parser/byte-literals.stderr +++ b/tests/ui/parser/byte-literals.stderr @@ -46,13 +46,10 @@ error: non-ASCII character in byte literal --> $DIR/byte-literals.rs:10:7 | LL | b'é'; - | ^ must be ASCII - | -help: if you meant to use the unicode code point for 'é', use a \xHH escape - | -LL - b'é'; -LL + b'\xE9'; - | + | ^ + | | + | must be ASCII + | this multibyte character does not fit into a single byte error[E0763]: unterminated byte constant --> $DIR/byte-literals.rs:11:6 From 8aa9676233435ef0d5875f58fc972c51f7221d72 Mon Sep 17 00:00:00 2001 From: Tapan Prakash Date: Fri, 4 Apr 2025 21:06:24 +0530 Subject: [PATCH 3/3] Fix test case --- tests/ui/parser/byte-string-literals.stderr | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ui/parser/byte-string-literals.stderr b/tests/ui/parser/byte-string-literals.stderr index 3e589258d4132..6bf70479f1c40 100644 --- a/tests/ui/parser/byte-string-literals.stderr +++ b/tests/ui/parser/byte-string-literals.stderr @@ -26,10 +26,10 @@ error: non-ASCII character in byte string literal LL | b"é"; | ^ must be ASCII | -help: if you meant to use the unicode code point for 'é', use a \xHH escape +help: if you meant to use the UTF-8 encoding of 'é', use \xHH escapes | LL - b"é"; -LL + b"\xE9"; +LL + b"\xC3\xA9"; | error: non-ASCII character in raw byte string literal