From 0031807e5ad8e2e2613f94dc9fa241b0c495115d Mon Sep 17 00:00:00 2001
From: Tapan Prakash <tapanprakasht@gmail.com>
Date: Fri, 4 Apr 2025 17:12:58 +0530
Subject: [PATCH 1/3] Improve unescape error reporting for multibyte characters
 in byte literals

---
 .../src/lexer/unescape_error_reporting.rs     | 11 +-----
 tests/ui/suggestions/multibyte-escapes.rs     | 12 +++++-
 tests/ui/suggestions/multibyte-escapes.stderr | 37 +++++++++++++++----
 3 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
index 2e066f0179c3f..ad22e310b13c5 100644
--- a/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
+++ b/compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
@@ -199,16 +199,7 @@ pub(crate) fn emit_unescape_error(
             err.span_label(span, format!("must be ASCII{postfix}"));
             // Note: the \\xHH suggestions are not given for raw byte string
             // literals, because they are araw and so cannot use any escapes.
-            if (c as u32) <= 0xFF && mode != Mode::RawByteStr {
-                err.span_suggestion(
-                    span,
-                    format!(
-                        "if you meant to use the unicode code point for {c:?}, use a \\xHH escape"
-                    ),
-                    format!("\\x{:X}", c as u32),
-                    Applicability::MaybeIncorrect,
-                );
-            } else if mode == Mode::Byte {
+            if mode == Mode::Byte {
                 err.span_label(span, "this multibyte character does not fit into a single byte");
             } else if mode != Mode::RawByteStr {
                 let mut utf8 = String::new();
diff --git a/tests/ui/suggestions/multibyte-escapes.rs b/tests/ui/suggestions/multibyte-escapes.rs
index c4105186244db..5cec7c7881124 100644
--- a/tests/ui/suggestions/multibyte-escapes.rs
+++ b/tests/ui/suggestions/multibyte-escapes.rs
@@ -3,7 +3,7 @@
 fn main() {
     b'µ';
     //~^ ERROR: non-ASCII character in byte literal
-    //~| HELP: if you meant to use the unicode code point for 'µ', use a \xHH escape
+    //~| NOTE: this multibyte character does not fit into a single byte
     //~| NOTE: must be ASCII
 
     b'字';
@@ -15,4 +15,14 @@ fn main() {
     //~^ ERROR: non-ASCII character in byte string literal
     //~| HELP: if you meant to use the UTF-8 encoding of '字', use \xHH escapes
     //~| NOTE: must be ASCII
+
+    b"µ";
+    //~^ ERROR: non-ASCII character in byte string literal
+    //~| HELP: if you meant to use the UTF-8 encoding of 'µ', use \xHH escapes
+    //~| NOTE: must be ASCII
+
+    b"ñ";
+    //~^ ERROR: non-ASCII character in byte string literal
+    //~| HELP: if you meant to use the UTF-8 encoding of 'ñ', use \xHH escapes
+    //~| NOTE: must be ASCII
 }
diff --git a/tests/ui/suggestions/multibyte-escapes.stderr b/tests/ui/suggestions/multibyte-escapes.stderr
index 7208d2f5be891..bfe66870b4d3b 100644
--- a/tests/ui/suggestions/multibyte-escapes.stderr
+++ b/tests/ui/suggestions/multibyte-escapes.stderr
@@ -2,13 +2,10 @@ error: non-ASCII character in byte literal
   --> $DIR/multibyte-escapes.rs:4:7
    |
 LL |     b'µ';
-   |       ^ must be ASCII
-   |
-help: if you meant to use the unicode code point for 'µ', use a \xHH escape
-   |
-LL -     b'µ';
-LL +     b'\xB5';
-   |
+   |       ^
+   |       |
+   |       must be ASCII
+   |       this multibyte character does not fit into a single byte
 
 error: non-ASCII character in byte literal
   --> $DIR/multibyte-escapes.rs:9:7
@@ -31,5 +28,29 @@ LL -     b"字";
 LL +     b"\xE5\xAD\x97";
    |
 
-error: aborting due to 3 previous errors
+error: non-ASCII character in byte string literal
+  --> $DIR/multibyte-escapes.rs:19:7
+   |
+LL |     b"µ";
+   |       ^ must be ASCII
+   |
+help: if you meant to use the UTF-8 encoding of 'µ', use \xHH escapes
+   |
+LL -     b"µ";
+LL +     b"\xC2\xB5";
+   |
+
+error: non-ASCII character in byte string literal
+  --> $DIR/multibyte-escapes.rs:24:7
+   |
+LL |     b"ñ";
+   |       ^ must be ASCII
+   |
+help: if you meant to use the UTF-8 encoding of 'ñ', use \xHH escapes
+   |
+LL -     b"ñ";
+LL +     b"\xC3\xB1";
+   |
+
+error: aborting due to 5 previous errors
 

From 7896e940cd842769ad0473f9547cbe9c0b4378a1 Mon Sep 17 00:00:00 2001
From: Tapan Prakash <tapanprakasht@gmail.com>
Date: Fri, 4 Apr 2025 20:15:17 +0530
Subject: [PATCH 2/3] Fix failing test case

---
 tests/ui/parser/byte-literals.stderr | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/tests/ui/parser/byte-literals.stderr b/tests/ui/parser/byte-literals.stderr
index 1c89e8e2864b6..b998cf55e8273 100644
--- a/tests/ui/parser/byte-literals.stderr
+++ b/tests/ui/parser/byte-literals.stderr
@@ -46,13 +46,10 @@ error: non-ASCII character in byte literal
   --> $DIR/byte-literals.rs:10:7
    |
 LL |     b'é';
-   |       ^ must be ASCII
-   |
-help: if you meant to use the unicode code point for 'é', use a \xHH escape
-   |
-LL -     b'é';
-LL +     b'\xE9';
-   |
+   |       ^
+   |       |
+   |       must be ASCII
+   |       this multibyte character does not fit into a single byte
 
 error[E0763]: unterminated byte constant
   --> $DIR/byte-literals.rs:11:6

From 8aa9676233435ef0d5875f58fc972c51f7221d72 Mon Sep 17 00:00:00 2001
From: Tapan Prakash <tapanprakasht@gmail.com>
Date: Fri, 4 Apr 2025 21:06:24 +0530
Subject: [PATCH 3/3] Fix test case

---
 tests/ui/parser/byte-string-literals.stderr | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ui/parser/byte-string-literals.stderr b/tests/ui/parser/byte-string-literals.stderr
index 3e589258d4132..6bf70479f1c40 100644
--- a/tests/ui/parser/byte-string-literals.stderr
+++ b/tests/ui/parser/byte-string-literals.stderr
@@ -26,10 +26,10 @@ error: non-ASCII character in byte string literal
 LL |     b"é";
    |       ^ must be ASCII
    |
-help: if you meant to use the unicode code point for 'é', use a \xHH escape
+help: if you meant to use the UTF-8 encoding of 'é', use \xHH escapes
    |
 LL -     b"é";
-LL +     b"\xE9";
+LL +     b"\xC3\xA9";
    |
 
 error: non-ASCII character in raw byte string literal