From 54e33bbdeca62508a71c0e445f1d1c82eb0b48c3 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 31 Dec 2024 04:53:00 +0000 Subject: [PATCH 1/3] Account for C string literals in HiddenUnicodeCodepoints lint --- .../src/hidden_unicode_codepoints.rs | 24 +++++-- tests/ui/parser/unicode-control-codepoints.rs | 7 ++ .../parser/unicode-control-codepoints.stderr | 68 ++++++++++++++----- 3 files changed, 76 insertions(+), 23 deletions(-) diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs index 28368e1ab462b..4a7e4bf75cf3c 100644 --- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs +++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs @@ -101,14 +101,28 @@ impl EarlyLintPass for HiddenUnicodeCodepoints { if !contains_text_flow_control_chars(text.as_str()) { return; } - let padding = match token_lit.kind { + let (padding, point_at_inner_spans) = match token_lit.kind { // account for `"` or `'` - ast::token::LitKind::Str | ast::token::LitKind::Char => 1, + ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), + // account for `c"` + ast::token::LitKind::CStr => (2, true), // account for `r###"` - ast::token::LitKind::StrRaw(n) => n as u32 + 2, - _ => return, + ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), + // account for `cr###"` + ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), + // suppress bad literals. + ast::token::LitKind::Err(_) => return, + // Be conservative just in case new literals do support these. + _ => (0, false), }; - self.lint_text_direction_codepoint(cx, text, expr.span, padding, true, "literal"); + self.lint_text_direction_codepoint( + cx, + text, + expr.span, + padding, + point_at_inner_spans, + "literal", + ); } _ => {} }; diff --git a/tests/ui/parser/unicode-control-codepoints.rs b/tests/ui/parser/unicode-control-codepoints.rs index df099bb62ad1e..c2b9a9911ac5d 100644 --- a/tests/ui/parser/unicode-control-codepoints.rs +++ b/tests/ui/parser/unicode-control-codepoints.rs @@ -1,3 +1,5 @@ +//@ edition: 2021 + fn main() { // if access_level != "us‫e‪r" { // Check if admin //~^ ERROR unicode codepoint changing visible direction of text present in comment @@ -25,6 +27,11 @@ fn main() { //~| ERROR non-ASCII character in raw byte string literal println!("{:?}", '‮'); //~^ ERROR unicode codepoint changing visible direction of text present in literal + + let _ = c"‮"; + //~^ ERROR unicode codepoint changing visible direction of text present in literal + let _ = cr#"‮"#; + //~^ ERROR unicode codepoint changing visible direction of text present in literal } //"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */" diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index 28de4ae72abbd..fa75df6a443ad 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -1,5 +1,5 @@ error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:26 + --> $DIR/unicode-control-codepoints.rs:8:26 | LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | ^^^^^^^^ unicode escape in byte string @@ -7,7 +7,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); = help: unicode escape sequences cannot be used as a byte or in a byte string error: unicode escape in byte string - --> $DIR/unicode-control-codepoints.rs:6:35 + --> $DIR/unicode-control-codepoints.rs:8:35 | LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); | ^^^^^^^^ unicode escape in byte string @@ -15,7 +15,7 @@ LL | println!("{:?}", b"us\u{202B}e\u{202A}r"); = help: unicode escape sequences cannot be used as a byte or in a byte string error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:26 + --> $DIR/unicode-control-codepoints.rs:18:26 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{202e}' @@ -26,7 +26,7 @@ LL | println!("{:?}", b"/*\xE2\x80\xAE } �if isAdmin� � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:30 + --> $DIR/unicode-control-codepoints.rs:18:30 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2066}' @@ -37,7 +37,7 @@ LL | println!("{:?}", b"/*� } \xE2\x81\xA6if isAdmin� � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:41 + --> $DIR/unicode-control-codepoints.rs:18:41 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2069}' @@ -48,7 +48,7 @@ LL | println!("{:?}", b"/*� } �if isAdmin\xE2\x81\xA9 � begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in byte string literal - --> $DIR/unicode-control-codepoints.rs:16:43 + --> $DIR/unicode-control-codepoints.rs:18:43 | LL | println!("{:?}", b"/*� } �if isAdmin� � begin admins only "); | ^ must be ASCII but is '\u{2066}' @@ -59,31 +59,31 @@ LL | println!("{:?}", b"/*� } �if isAdmin� \xE2\x81\xA6 begin admins o | ~~~~~~~~~~~~ error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:29 + --> $DIR/unicode-control-codepoints.rs:23:29 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{202e}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:33 + --> $DIR/unicode-control-codepoints.rs:23:33 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2066}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:44 + --> $DIR/unicode-control-codepoints.rs:23:44 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2069}' error: non-ASCII character in raw byte string literal - --> $DIR/unicode-control-codepoints.rs:21:46 + --> $DIR/unicode-control-codepoints.rs:23:46 | LL | println!("{:?}", br##"/*� } �if isAdmin� � begin admins only "##); | ^ must be ASCII but is '\u{2066}' error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:2:5 + --> $DIR/unicode-control-codepoints.rs:4:5 | LL | // if access_level != "us�e�r" { // Check if admin | ^^^^^^^^^^^^^^^^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -97,7 +97,7 @@ LL | // if access_level != "us�e�r" { // Check if admin = help: if their presence wasn't intentional, you can remove them error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:30:1 + --> $DIR/unicode-control-codepoints.rs:37:1 | LL | //"/*� } �if isAdmin� � begin admins only */" | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -112,7 +112,7 @@ LL | //"/*� } �if isAdmin� � begin admins only */" = help: if their presence wasn't intentional, you can remove them error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:11:22 + --> $DIR/unicode-control-codepoints.rs:13:22 | LL | println!("{:?}", "/*� } �if isAdmin� � begin admins only "); | ^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^ @@ -132,7 +132,7 @@ LL | println!("{:?}", "/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} begi | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:14:22 + --> $DIR/unicode-control-codepoints.rs:16:22 | LL | println!("{:?}", r##"/*� } �if isAdmin� � begin admins only "##); | ^^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -151,7 +151,7 @@ LL | println!("{:?}", r##"/*\u{202e} } \u{2066}if isAdmin\u{2069} \u{2066} b | ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ error: unicode codepoint changing visible direction of text present in literal - --> $DIR/unicode-control-codepoints.rs:26:22 + --> $DIR/unicode-control-codepoints.rs:28:22 | LL | println!("{:?}", '�'); | ^-^ @@ -166,8 +166,40 @@ help: if you want to keep them but make them visible in your source code, you ca LL | println!("{:?}", '\u{202e}'); | ~~~~~~~~ +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:31:13 + | +LL | let _ = c"�"; + | ^^-^ + | | | + | | '\u{202e}' + | this literal contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | let _ = c"\u{202e}"; + | ~~~~~~~~ + +error: unicode codepoint changing visible direction of text present in literal + --> $DIR/unicode-control-codepoints.rs:33:13 + | +LL | let _ = cr#"�"#; + | ^^^^-^^ + | | | + | | '\u{202e}' + | this literal contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | let _ = cr#"\u{202e}"#; + | ~~~~~~~~ + error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:33:1 + --> $DIR/unicode-control-codepoints.rs:40:1 | LL | /** '�'); */fn foo() {} | ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint @@ -177,7 +209,7 @@ LL | /** '�'); */fn foo() {} = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:36:1 + --> $DIR/unicode-control-codepoints.rs:43:1 | LL | / /** LL | | * @@ -188,5 +220,5 @@ LL | | * '�'); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' -error: aborting due to 17 previous errors +error: aborting due to 19 previous errors From c6afe82b8a3255145ba0eeeb49f8c590e38f38e2 Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 31 Dec 2024 04:15:40 +0000 Subject: [PATCH 2/3] Make parsed string literal fields named --- compiler/rustc_builtin_macros/src/asm.rs | 8 ++++++-- compiler/rustc_builtin_macros/src/format.rs | 6 +++--- compiler/rustc_builtin_macros/src/util.rs | 18 ++++++++++++++---- 3 files changed, 23 insertions(+), 9 deletions(-) diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs index 6ae697d4030db..238cc14ff0b0a 100644 --- a/compiler/rustc_builtin_macros/src/asm.rs +++ b/compiler/rustc_builtin_macros/src/asm.rs @@ -16,7 +16,7 @@ use smallvec::smallvec; use {rustc_ast as ast, rustc_parse_format as parse}; use crate::errors; -use crate::util::expr_to_spanned_string; +use crate::util::{ExprToSpannedString, expr_to_spanned_string}; pub struct AsmArgs { pub templates: Vec>, @@ -527,7 +527,11 @@ fn expand_preparsed_asm( let msg = "asm template must be a string literal"; let template_sp = template_expr.span; let template_is_mac_call = matches!(template_expr.kind, ast::ExprKind::MacCall(_)); - let (template_str, template_style, template_span) = { + let ExprToSpannedString { + symbol: template_str, + style: template_style, + span: template_span, + } = { let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else { return ExpandResult::Retry(()); }; diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index 528eb7725f5c8..5b3f08948a92d 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -17,7 +17,7 @@ use rustc_parse_format as parse; use rustc_span::{BytePos, ErrorGuaranteed, Ident, InnerSpan, Span, Symbol}; use crate::errors; -use crate::util::expr_to_spanned_string; +use crate::util::{ExprToSpannedString, expr_to_spanned_string}; // The format_args!() macro is expanded in three steps: // 1. First, `parse_args` will parse the `(literal, arg, arg, name=arg, name=arg)` syntax, @@ -166,13 +166,13 @@ fn make_format_args( let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input; - let (fmt_str, fmt_style, fmt_span) = { + let ExprToSpannedString { symbol: fmt_str, span: fmt_span, style: fmt_style } = { let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else { return ExpandResult::Retry(()); }; match mac { Ok(mut fmt) if append_newline => { - fmt.0 = Symbol::intern(&format!("{}\n", fmt.0)); + fmt.symbol = Symbol::intern(&format!("{}\n", fmt.symbol)); fmt } Ok(fmt) => fmt, diff --git a/compiler/rustc_builtin_macros/src/util.rs b/compiler/rustc_builtin_macros/src/util.rs index be12d21a80000..9162e94eddb85 100644 --- a/compiler/rustc_builtin_macros/src/util.rs +++ b/compiler/rustc_builtin_macros/src/util.rs @@ -57,7 +57,13 @@ pub(crate) fn warn_on_duplicate_attribute(ecx: &ExtCtxt<'_>, item: &Annotatable, /// `Ok` represents successfully retrieving the string literal at the correct /// position, e.g., `println("abc")`. -type ExprToSpannedStringResult<'a> = Result<(Symbol, ast::StrStyle, Span), UnexpectedExprKind<'a>>; +pub(crate) type ExprToSpannedStringResult<'a> = Result>; + +pub(crate) struct ExprToSpannedString { + pub symbol: Symbol, + pub style: ast::StrStyle, + pub span: Span, +} /// - `Ok` is returned when the conversion to a string literal is unsuccessful, /// but another type of expression is obtained instead. @@ -90,7 +96,11 @@ pub(crate) fn expr_to_spanned_string<'a>( ExpandResult::Ready(Err(match expr.kind { ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) { Ok(ast::LitKind::Str(s, style)) => { - return ExpandResult::Ready(Ok((s, style, expr.span))); + return ExpandResult::Ready(Ok(ExprToSpannedString { + symbol: s, + style, + span: expr.span, + })); } Ok(ast::LitKind::ByteStr(..)) => { let mut err = cx.dcx().struct_span_err(expr.span, err_msg); @@ -128,7 +138,7 @@ pub(crate) fn expr_to_string( Ok((err, _)) => err.emit(), Err(guar) => guar, }) - .map(|(symbol, style, _)| (symbol, style)) + .map(|ExprToSpannedString { symbol, style, .. }| (symbol, style)) }) } @@ -183,7 +193,7 @@ pub(crate) fn get_single_str_spanned_from_tts( Ok((err, _)) => err.emit(), Err(guar) => guar, }) - .map(|(symbol, _style, span)| (symbol, span)) + .map(|ExprToSpannedString { symbol, span, .. }| (symbol, span)) }) } From ea291e5b5f5c2562fec89a11444e0dc4388565cf Mon Sep 17 00:00:00 2001 From: Michael Goulet Date: Tue, 31 Dec 2024 05:03:22 +0000 Subject: [PATCH 3/3] Account for format_args in HiddenUnicodeCodepoints lint --- compiler/rustc_ast/src/format.rs | 5 ++ compiler/rustc_ast/src/mut_visit.rs | 2 +- compiler/rustc_ast/src/visit.rs | 2 +- compiler/rustc_builtin_macros/src/asm.rs | 1 + compiler/rustc_builtin_macros/src/format.rs | 14 ++++- compiler/rustc_builtin_macros/src/util.rs | 5 ++ .../src/hidden_unicode_codepoints.rs | 60 +++++++++++-------- tests/ui/parser/unicode-control-codepoints.rs | 3 + .../parser/unicode-control-codepoints.stderr | 24 ++++++-- 9 files changed, 82 insertions(+), 34 deletions(-) diff --git a/compiler/rustc_ast/src/format.rs b/compiler/rustc_ast/src/format.rs index de628f098532f..b93846c1fe6f3 100644 --- a/compiler/rustc_ast/src/format.rs +++ b/compiler/rustc_ast/src/format.rs @@ -4,6 +4,7 @@ use rustc_span::{Ident, Span, Symbol}; use crate::Expr; use crate::ptr::P; +use crate::token::LitKind; // Definitions: // @@ -45,6 +46,10 @@ pub struct FormatArgs { pub span: Span, pub template: Vec, pub arguments: FormatArguments, + /// The raw, un-split format string literal, with no escaping or processing. + /// + /// Generally only useful for lints that care about the raw bytes the user wrote. + pub uncooked_fmt_str: (LitKind, Symbol), } /// A piece of a format template string. diff --git a/compiler/rustc_ast/src/mut_visit.rs b/compiler/rustc_ast/src/mut_visit.rs index 995924c2a2949..04cdfc93dcb15 100644 --- a/compiler/rustc_ast/src/mut_visit.rs +++ b/compiler/rustc_ast/src/mut_visit.rs @@ -1596,7 +1596,7 @@ fn walk_inline_asm_sym( fn walk_format_args(vis: &mut T, fmt: &mut FormatArgs) { // FIXME: visit the template exhaustively. - let FormatArgs { span, template: _, arguments } = fmt; + let FormatArgs { span, template: _, arguments, uncooked_fmt_str: _ } = fmt; for FormatArgument { kind, expr } in arguments.all_args_mut() { match kind { FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => { diff --git a/compiler/rustc_ast/src/visit.rs b/compiler/rustc_ast/src/visit.rs index c7cc772dabb7e..e99fc7b604e31 100644 --- a/compiler/rustc_ast/src/visit.rs +++ b/compiler/rustc_ast/src/visit.rs @@ -1061,7 +1061,7 @@ pub fn walk_inline_asm_sym<'a, V: Visitor<'a>>( } pub fn walk_format_args<'a, V: Visitor<'a>>(visitor: &mut V, fmt: &'a FormatArgs) -> V::Result { - let FormatArgs { span: _, template: _, arguments } = fmt; + let FormatArgs { span: _, template: _, arguments, uncooked_fmt_str: _ } = fmt; for FormatArgument { kind, expr } in arguments.all_args() { match kind { FormatArgumentKind::Named(ident) | FormatArgumentKind::Captured(ident) => { diff --git a/compiler/rustc_builtin_macros/src/asm.rs b/compiler/rustc_builtin_macros/src/asm.rs index 238cc14ff0b0a..5062cf55bb9ad 100644 --- a/compiler/rustc_builtin_macros/src/asm.rs +++ b/compiler/rustc_builtin_macros/src/asm.rs @@ -531,6 +531,7 @@ fn expand_preparsed_asm( symbol: template_str, style: template_style, span: template_span, + .. } = { let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, template_expr, msg) else { return ExpandResult::Retry(()); diff --git a/compiler/rustc_builtin_macros/src/format.rs b/compiler/rustc_builtin_macros/src/format.rs index 5b3f08948a92d..0112499c50949 100644 --- a/compiler/rustc_builtin_macros/src/format.rs +++ b/compiler/rustc_builtin_macros/src/format.rs @@ -166,7 +166,12 @@ fn make_format_args( let MacroInput { fmtstr: efmt, mut args, is_direct_literal } = input; - let ExprToSpannedString { symbol: fmt_str, span: fmt_span, style: fmt_style } = { + let ExprToSpannedString { + symbol: fmt_str, + span: fmt_span, + style: fmt_style, + uncooked_symbol: uncooked_fmt_str, + } = { let ExpandResult::Ready(mac) = expr_to_spanned_string(ecx, efmt.clone(), msg) else { return ExpandResult::Retry(()); }; @@ -584,7 +589,12 @@ fn make_format_args( } } - ExpandResult::Ready(Ok(FormatArgs { span: fmt_span, template, arguments: args })) + ExpandResult::Ready(Ok(FormatArgs { + span: fmt_span, + template, + arguments: args, + uncooked_fmt_str, + })) } fn invalid_placeholder_type_error( diff --git a/compiler/rustc_builtin_macros/src/util.rs b/compiler/rustc_builtin_macros/src/util.rs index 9162e94eddb85..38fec2bff14c8 100644 --- a/compiler/rustc_builtin_macros/src/util.rs +++ b/compiler/rustc_builtin_macros/src/util.rs @@ -63,6 +63,10 @@ pub(crate) struct ExprToSpannedString { pub symbol: Symbol, pub style: ast::StrStyle, pub span: Span, + /// The raw string literal, with no escaping or processing. + /// + /// Generally only useful for lints that care about the raw bytes the user wrote. + pub uncooked_symbol: (ast::token::LitKind, Symbol), } /// - `Ok` is returned when the conversion to a string literal is unsuccessful, @@ -100,6 +104,7 @@ pub(crate) fn expr_to_spanned_string<'a>( symbol: s, style, span: expr.span, + uncooked_symbol: (token_lit.kind, token_lit.symbol), })); } Ok(ast::LitKind::ByteStr(..)) => { diff --git a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs index 4a7e4bf75cf3c..406aa1005dfba 100644 --- a/compiler/rustc_lint/src/hidden_unicode_codepoints.rs +++ b/compiler/rustc_lint/src/hidden_unicode_codepoints.rs @@ -82,7 +82,36 @@ impl HiddenUnicodeCodepoints { sub, }); } + + fn check_literal( + &mut self, + cx: &EarlyContext<'_>, + text: Symbol, + lit_kind: ast::token::LitKind, + span: Span, + label: &'static str, + ) { + if !contains_text_flow_control_chars(text.as_str()) { + return; + } + let (padding, point_at_inner_spans) = match lit_kind { + // account for `"` or `'` + ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), + // account for `c"` + ast::token::LitKind::CStr => (2, true), + // account for `r###"` + ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), + // account for `cr###"` + ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), + // suppress bad literals. + ast::token::LitKind::Err(_) => return, + // Be conservative just in case new literals do support these. + _ => (0, false), + }; + self.lint_text_direction_codepoint(cx, text, span, padding, point_at_inner_spans, label); + } } + impl EarlyLintPass for HiddenUnicodeCodepoints { fn check_attribute(&mut self, cx: &EarlyContext<'_>, attr: &ast::Attribute) { if let ast::AttrKind::DocComment(_, comment) = attr.kind { @@ -97,32 +126,11 @@ impl EarlyLintPass for HiddenUnicodeCodepoints { // byte strings are already handled well enough by `EscapeError::NonAsciiCharInByteString` match &expr.kind { ast::ExprKind::Lit(token_lit) => { - let text = token_lit.symbol; - if !contains_text_flow_control_chars(text.as_str()) { - return; - } - let (padding, point_at_inner_spans) = match token_lit.kind { - // account for `"` or `'` - ast::token::LitKind::Str | ast::token::LitKind::Char => (1, true), - // account for `c"` - ast::token::LitKind::CStr => (2, true), - // account for `r###"` - ast::token::LitKind::StrRaw(n) => (n as u32 + 2, true), - // account for `cr###"` - ast::token::LitKind::CStrRaw(n) => (n as u32 + 3, true), - // suppress bad literals. - ast::token::LitKind::Err(_) => return, - // Be conservative just in case new literals do support these. - _ => (0, false), - }; - self.lint_text_direction_codepoint( - cx, - text, - expr.span, - padding, - point_at_inner_spans, - "literal", - ); + self.check_literal(cx, token_lit.symbol, token_lit.kind, expr.span, "literal"); + } + ast::ExprKind::FormatArgs(args) => { + let (lit_kind, text) = args.uncooked_fmt_str; + self.check_literal(cx, text, lit_kind, args.span, "format string"); } _ => {} }; diff --git a/tests/ui/parser/unicode-control-codepoints.rs b/tests/ui/parser/unicode-control-codepoints.rs index c2b9a9911ac5d..14e1cfe59d39a 100644 --- a/tests/ui/parser/unicode-control-codepoints.rs +++ b/tests/ui/parser/unicode-control-codepoints.rs @@ -32,6 +32,9 @@ fn main() { //~^ ERROR unicode codepoint changing visible direction of text present in literal let _ = cr#"‮"#; //~^ ERROR unicode codepoint changing visible direction of text present in literal + + println!("{{‮}}"); + //~^ ERROR unicode codepoint changing visible direction of text present in format string } //"/*‮ } ⁦if isAdmin⁩ ⁦ begin admins only */" diff --git a/tests/ui/parser/unicode-control-codepoints.stderr b/tests/ui/parser/unicode-control-codepoints.stderr index fa75df6a443ad..2893194308ed9 100644 --- a/tests/ui/parser/unicode-control-codepoints.stderr +++ b/tests/ui/parser/unicode-control-codepoints.stderr @@ -97,7 +97,7 @@ LL | // if access_level != "us�e�r" { // Check if admin = help: if their presence wasn't intentional, you can remove them error: unicode codepoint changing visible direction of text present in comment - --> $DIR/unicode-control-codepoints.rs:37:1 + --> $DIR/unicode-control-codepoints.rs:40:1 | LL | //"/*� } �if isAdmin� � begin admins only */" | ^^^^^-^^^-^^^^^^^^^^-^-^^^^^^^^^^^^^^^^^^^^^^ @@ -198,8 +198,24 @@ help: if you want to keep them but make them visible in your source code, you ca LL | let _ = cr#"\u{202e}"#; | ~~~~~~~~ +error: unicode codepoint changing visible direction of text present in format string + --> $DIR/unicode-control-codepoints.rs:36:14 + | +LL | println!("{{�}}"); + | ^^^-^^^ + | | | + | | '\u{202e}' + | this format string contains an invisible unicode text flow control codepoint + | + = note: these kind of unicode codepoints change the way text flows on applications that support them, but can cause confusion because they change the order of characters on the screen + = help: if their presence wasn't intentional, you can remove them +help: if you want to keep them but make them visible in your source code, you can escape them + | +LL | println!("{{\u{202e}}}"); + | ~~~~~~~~ + error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:40:1 + --> $DIR/unicode-control-codepoints.rs:43:1 | LL | /** '�'); */fn foo() {} | ^^^^^^^^^^^^^ this doc comment contains an invisible unicode text flow control codepoint @@ -209,7 +225,7 @@ LL | /** '�'); */fn foo() {} = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' error: unicode codepoint changing visible direction of text present in doc comment - --> $DIR/unicode-control-codepoints.rs:43:1 + --> $DIR/unicode-control-codepoints.rs:46:1 | LL | / /** LL | | * @@ -220,5 +236,5 @@ LL | | * '�'); */fn bar() {} = note: if their presence wasn't intentional, you can remove them = note: if you want to keep them but make them visible in your source code, you can escape them: '\u{202e}' -error: aborting due to 19 previous errors +error: aborting due to 20 previous errors