From 478f8287c0e2c35cda511fd3ac01b7ac78ee7cfe Mon Sep 17 00:00:00 2001 From: Nicholas Nethercote Date: Mon, 2 Jun 2025 08:59:29 +1000 Subject: [PATCH] Introduce `ByteSymbol`. It's like `Symbol` but for byte strings. The interner is now used for both `Symbol` and `ByteSymbol`. E.g. if you intern `"dog"` and `b"dog"` you'll get a `Symbol` and a `ByteSymbol` with the same index and the characters will only be stored once. The motivation for this is to eliminate the `Arc`s in `ast::LitKind`, to make `ast::LitKind` impl `Copy`, and to avoid the need to arena-allocate `ast::LitKind` in HIR. The latter change reduces peak memory by a non-trivial amount on literal-heavy benchmarks such as `deep-vector` and `tuple-stress`. `Encoder`, `Decoder`, `SpanEncoder`, and `SpanDecoder` all get some changes so that they can handle normal strings and byte strings. This change does slow down compilation of programs that use `include_bytes!` on large files, because the contents of those files are now interned (hashed). This makes `include_bytes!` more similar to `include_str!`, though `include_bytes!` contents still aren't escaped, and hashing is still much cheaper than escaping. --- compiler/rustc_ast/src/ast.rs | 28 ++-- compiler/rustc_ast/src/util/literal.rs | 25 ++-- compiler/rustc_ast_lowering/src/expr.rs | 24 ++-- compiler/rustc_ast_lowering/src/pat.rs | 12 +- .../rustc_ast_pretty/src/pprust/state/expr.rs | 8 +- .../rustc_builtin_macros/src/concat_bytes.rs | 8 +- .../rustc_builtin_macros/src/source_util.rs | 4 +- .../rustc_expand/src/proc_macro_server.rs | 8 +- compiler/rustc_hir/src/arena.rs | 1 - compiler/rustc_hir/src/hir.rs | 4 +- compiler/rustc_hir/src/intravisit.rs | 4 +- .../src/hir_ty_lowering/mod.rs | 4 +- compiler/rustc_hir_pretty/src/lib.rs | 2 +- .../rustc_hir_typeck/src/fn_ctxt/checks.rs | 2 +- .../src/fn_ctxt/suggestions.rs | 4 +- compiler/rustc_lint/src/invalid_from_utf8.rs | 4 +- compiler/rustc_lint/src/late.rs | 2 +- compiler/rustc_lint/src/passes.rs | 2 +- compiler/rustc_lint/src/types.rs | 10 +- compiler/rustc_metadata/src/rmeta/decoder.rs | 60 ++++---- compiler/rustc_metadata/src/rmeta/encoder.rs | 65 +++++---- .../rustc_middle/src/mir/interpret/mod.rs | 2 +- .../rustc_middle/src/query/on_disk_cache.rs | 129 +++++++++++------- compiler/rustc_middle/src/thir.rs | 2 +- .../src/builder/expr/as_constant.rs | 23 ++-- compiler/rustc_mir_build/src/thir/constant.rs | 26 ++-- .../rustc_mir_build/src/thir/pattern/mod.rs | 2 +- compiler/rustc_serialize/src/serialize.rs | 24 +++- compiler/rustc_span/src/lib.rs | 36 ++++- compiler/rustc_span/src/symbol.rs | 123 ++++++++++++++--- compiler/rustc_span/src/symbol/tests.rs | 12 +- compiler/rustc_ty_utils/src/consts.rs | 2 +- .../clippy/clippy_lints/src/approx_const.rs | 2 +- .../src/bool_assert_comparison.rs | 2 +- .../src/casts/manual_dangling_ptr.rs | 2 +- .../src/casts/unnecessary_cast.rs | 2 +- .../src/default_numeric_fallback.rs | 4 +- .../clippy_lints/src/large_include_file.rs | 2 +- .../src/manual_ignore_case_cmp.rs | 8 +- .../clippy/clippy_lints/src/manual_strip.rs | 2 +- .../src/matches/match_like_matches.rs | 2 +- .../src/matches/match_same_arms.rs | 8 +- .../clippy_lints/src/methods/open_options.rs | 2 +- .../src/missing_asserts_for_indexing.rs | 2 +- .../clippy/clippy_lints/src/utils/author.rs | 2 +- src/tools/clippy/clippy_utils/src/consts.rs | 12 +- 46 files changed, 447 insertions(+), 267 deletions(-) diff --git a/compiler/rustc_ast/src/ast.rs b/compiler/rustc_ast/src/ast.rs index b2d8881e3f6f2..f541840237742 100644 --- a/compiler/rustc_ast/src/ast.rs +++ b/compiler/rustc_ast/src/ast.rs @@ -19,7 +19,6 @@ //! - [`UnOp`], [`BinOp`], and [`BinOpKind`]: Unary and binary operators. use std::borrow::Cow; -use std::sync::Arc; use std::{cmp, fmt}; pub use GenericArgs::*; @@ -32,7 +31,7 @@ use rustc_data_structures::tagged_ptr::Tag; use rustc_macros::{Decodable, Encodable, HashStable_Generic}; pub use rustc_span::AttrId; use rustc_span::source_map::{Spanned, respan}; -use rustc_span::{DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym}; +use rustc_span::{ByteSymbol, DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym}; use thin_vec::{ThinVec, thin_vec}; pub use crate::format::*; @@ -1805,10 +1804,17 @@ pub enum ExprKind { Become(P), /// Bytes included via `include_bytes!` + /// /// Added for optimization purposes to avoid the need to escape /// large binary blobs - should always behave like [`ExprKind::Lit`] /// with a `ByteStr` literal. - IncludedBytes(Arc<[u8]>), + /// + /// The value is stored as a `ByteSymbol`. It's unfortunate that we need to + /// intern (hash) the bytes because they're likely to be large and unique. + /// But it's necessary because this will eventually be lowered to + /// `LitKind::ByteStr`, which needs a `ByteSymbol` to impl `Copy` and avoid + /// arena allocation. + IncludedBytes(ByteSymbol), /// A `format_args!()` expression. FormatArgs(P), @@ -2066,7 +2072,7 @@ impl YieldKind { } /// A literal in a meta item. -#[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)] +#[derive(Clone, Copy, Encodable, Decodable, Debug, HashStable_Generic)] pub struct MetaItemLit { /// The original literal as written in the source code. pub symbol: Symbol, @@ -2129,16 +2135,18 @@ pub enum LitFloatType { /// deciding the `LitKind`. This means that float literals like `1f32` are /// classified by this type as `Float`. This is different to `token::LitKind` /// which does *not* consider the suffix. -#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)] +#[derive(Clone, Copy, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)] pub enum LitKind { /// A string literal (`"foo"`). The symbol is unescaped, and so may differ /// from the original token's symbol. Str(Symbol, StrStyle), - /// A byte string (`b"foo"`). Not stored as a symbol because it might be - /// non-utf8, and symbols only allow utf8 strings. - ByteStr(Arc<[u8]>, StrStyle), - /// A C String (`c"foo"`). Guaranteed to only have `\0` at the end. - CStr(Arc<[u8]>, StrStyle), + /// A byte string (`b"foo"`). The symbol is unescaped, and so may differ + /// from the original token's symbol. + ByteStr(ByteSymbol, StrStyle), + /// A C String (`c"foo"`). Guaranteed to only have `\0` at the end. The + /// symbol is unescaped, and so may differ from the original token's + /// symbol. + CStr(ByteSymbol, StrStyle), /// A byte char (`b'f'`). Byte(u8), /// A character literal (`'a'`). diff --git a/compiler/rustc_ast/src/util/literal.rs b/compiler/rustc_ast/src/util/literal.rs index ad9e5d1468b09..fa7878873e56f 100644 --- a/compiler/rustc_ast/src/util/literal.rs +++ b/compiler/rustc_ast/src/util/literal.rs @@ -5,7 +5,7 @@ use std::{ascii, fmt, str}; use rustc_literal_escaper::{ MixedUnit, unescape_byte, unescape_byte_str, unescape_c_str, unescape_char, unescape_str, }; -use rustc_span::{Span, Symbol, kw, sym}; +use rustc_span::{ByteSymbol, Span, Symbol, kw, sym}; use tracing::debug; use crate::ast::{self, LitKind, MetaItemLit, StrStyle}; @@ -116,13 +116,12 @@ impl LitKind { assert!(!err.is_fatal(), "failed to unescape string literal") } }); - LitKind::ByteStr(buf.into(), StrStyle::Cooked) + LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Cooked) } token::ByteStrRaw(n) => { - // Raw strings have no escapes so we can convert the symbol - // directly to a `Arc`. + // Raw byte strings have no escapes so no work is needed here. let buf = symbol.as_str().to_owned().into_bytes(); - LitKind::ByteStr(buf.into(), StrStyle::Raw(n)) + LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Raw(n)) } token::CStr => { let s = symbol.as_str(); @@ -137,7 +136,7 @@ impl LitKind { } }); buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Cooked) + LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Cooked) } token::CStrRaw(n) => { // Raw strings have no escapes so we can convert the symbol @@ -145,7 +144,7 @@ impl LitKind { // char. let mut buf = symbol.as_str().to_owned().into_bytes(); buf.push(0); - LitKind::CStr(buf.into(), StrStyle::Raw(n)) + LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Raw(n)) } token::Err(guar) => LitKind::Err(guar), }) @@ -167,12 +166,12 @@ impl fmt::Display for LitKind { delim = "#".repeat(n as usize), string = sym )?, - LitKind::ByteStr(ref bytes, StrStyle::Cooked) => { - write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))? + LitKind::ByteStr(ref byte_sym, StrStyle::Cooked) => { + write!(f, "b\"{}\"", escape_byte_str_symbol(byte_sym.as_byte_str()))? } - LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => { + LitKind::ByteStr(ref byte_sym, StrStyle::Raw(n)) => { // Unwrap because raw byte string literals can only contain ASCII. - let symbol = str::from_utf8(bytes).unwrap(); + let symbol = str::from_utf8(byte_sym.as_byte_str()).unwrap(); write!( f, "br{delim}\"{string}\"{delim}", @@ -181,11 +180,11 @@ impl fmt::Display for LitKind { )?; } LitKind::CStr(ref bytes, StrStyle::Cooked) => { - write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))? + write!(f, "c\"{}\"", escape_byte_str_symbol(bytes.as_byte_str()))? } LitKind::CStr(ref bytes, StrStyle::Raw(n)) => { // This can only be valid UTF-8. - let symbol = str::from_utf8(bytes).unwrap(); + let symbol = str::from_utf8(bytes.as_byte_str()).unwrap(); write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?; } LitKind::Int(n, ty) => { diff --git a/compiler/rustc_ast_lowering/src/expr.rs b/compiler/rustc_ast_lowering/src/expr.rs index c2140514e3117..8747e624a4a9e 100644 --- a/compiler/rustc_ast_lowering/src/expr.rs +++ b/compiler/rustc_ast_lowering/src/expr.rs @@ -144,11 +144,11 @@ impl<'hir> LoweringContext<'_, 'hir> { hir::ExprKind::Unary(op, ohs) } ExprKind::Lit(token_lit) => hir::ExprKind::Lit(self.lower_lit(token_lit, e.span)), - ExprKind::IncludedBytes(bytes) => { - let lit = self.arena.alloc(respan( + ExprKind::IncludedBytes(byte_sym) => { + let lit = respan( self.lower_span(e.span), - LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked), - )); + LitKind::ByteStr(*byte_sym, StrStyle::Cooked), + ); hir::ExprKind::Lit(lit) } ExprKind::Cast(expr, ty) => { @@ -421,11 +421,7 @@ impl<'hir> LoweringContext<'_, 'hir> { }) } - pub(crate) fn lower_lit( - &mut self, - token_lit: &token::Lit, - span: Span, - ) -> &'hir Spanned { + pub(crate) fn lower_lit(&mut self, token_lit: &token::Lit, span: Span) -> hir::Lit { let lit_kind = match LitKind::from_token_lit(*token_lit) { Ok(lit_kind) => lit_kind, Err(err) => { @@ -433,7 +429,7 @@ impl<'hir> LoweringContext<'_, 'hir> { LitKind::Err(guar) } }; - self.arena.alloc(respan(self.lower_span(span), lit_kind)) + respan(self.lower_span(span), lit_kind) } fn lower_unop(&mut self, u: UnOp) -> hir::UnOp { @@ -2141,10 +2137,10 @@ impl<'hir> LoweringContext<'_, 'hir> { } fn expr_uint(&mut self, sp: Span, ty: ast::UintTy, value: u128) -> hir::Expr<'hir> { - let lit = self.arena.alloc(hir::Lit { + let lit = hir::Lit { span: sp, node: ast::LitKind::Int(value.into(), ast::LitIntType::Unsigned(ty)), - }); + }; self.expr(sp, hir::ExprKind::Lit(lit)) } @@ -2161,9 +2157,7 @@ impl<'hir> LoweringContext<'_, 'hir> { } pub(super) fn expr_str(&mut self, sp: Span, value: Symbol) -> hir::Expr<'hir> { - let lit = self - .arena - .alloc(hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) }); + let lit = hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) }; self.expr(sp, hir::ExprKind::Lit(lit)) } diff --git a/compiler/rustc_ast_lowering/src/pat.rs b/compiler/rustc_ast_lowering/src/pat.rs index 58dea472f1d3b..e444062104813 100644 --- a/compiler/rustc_ast_lowering/src/pat.rs +++ b/compiler/rustc_ast_lowering/src/pat.rs @@ -390,19 +390,15 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> { allow_paths: bool, ) -> &'hir hir::PatExpr<'hir> { let span = self.lower_span(expr.span); - let err = |guar| hir::PatExprKind::Lit { - lit: self.arena.alloc(respan(span, LitKind::Err(guar))), - negated: false, - }; + let err = + |guar| hir::PatExprKind::Lit { lit: respan(span, LitKind::Err(guar)), negated: false }; let kind = match &expr.kind { ExprKind::Lit(lit) => { hir::PatExprKind::Lit { lit: self.lower_lit(lit, span), negated: false } } ExprKind::ConstBlock(c) => hir::PatExprKind::ConstBlock(self.lower_const_block(c)), - ExprKind::IncludedBytes(bytes) => hir::PatExprKind::Lit { - lit: self - .arena - .alloc(respan(span, LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked))), + ExprKind::IncludedBytes(byte_sym) => hir::PatExprKind::Lit { + lit: respan(span, LitKind::ByteStr(*byte_sym, StrStyle::Cooked)), negated: false, }, ExprKind::Err(guar) => err(*guar), diff --git a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs index 7651e8365a293..8a2cb64b2a084 100644 --- a/compiler/rustc_ast_pretty/src/pprust/state/expr.rs +++ b/compiler/rustc_ast_pretty/src/pprust/state/expr.rs @@ -469,8 +469,12 @@ impl<'a> State<'a> { ast::ExprKind::Lit(token_lit) => { self.print_token_literal(*token_lit, expr.span); } - ast::ExprKind::IncludedBytes(bytes) => { - let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None); + ast::ExprKind::IncludedBytes(byte_sym) => { + let lit = token::Lit::new( + token::ByteStr, + escape_byte_str_symbol(byte_sym.as_byte_str()), + None, + ); self.print_token_literal(lit, expr.span) } ast::ExprKind::Cast(expr, ty) => { diff --git a/compiler/rustc_builtin_macros/src/concat_bytes.rs b/compiler/rustc_builtin_macros/src/concat_bytes.rs index 92d011fb9d1f0..fd2d740c02030 100644 --- a/compiler/rustc_builtin_macros/src/concat_bytes.rs +++ b/compiler/rustc_builtin_macros/src/concat_bytes.rs @@ -177,15 +177,15 @@ pub(crate) fn expand_concat_bytes( Ok(LitKind::Byte(val)) => { accumulator.push(val); } - Ok(LitKind::ByteStr(ref bytes, _)) => { - accumulator.extend_from_slice(bytes); + Ok(LitKind::ByteStr(ref byte_sym, _)) => { + accumulator.extend_from_slice(byte_sym.as_byte_str()); } _ => { guar.get_or_insert_with(|| invalid_type_err(cx, token_lit, e.span, false)); } }, - ExprKind::IncludedBytes(bytes) => { - accumulator.extend_from_slice(bytes); + ExprKind::IncludedBytes(byte_sym) => { + accumulator.extend_from_slice(byte_sym.as_byte_str()); } ExprKind::Err(guarantee) => { guar = Some(*guarantee); diff --git a/compiler/rustc_builtin_macros/src/source_util.rs b/compiler/rustc_builtin_macros/src/source_util.rs index 8142f1518dd8d..cebfffa1e16cb 100644 --- a/compiler/rustc_builtin_macros/src/source_util.rs +++ b/compiler/rustc_builtin_macros/src/source_util.rs @@ -16,7 +16,7 @@ use rustc_parse::parser::{ForceCollect, Parser}; use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error}; use rustc_session::lint::builtin::INCOMPLETE_INCLUDE; use rustc_span::source_map::SourceMap; -use rustc_span::{Pos, Span, Symbol}; +use rustc_span::{ByteSymbol, Pos, Span, Symbol}; use smallvec::SmallVec; use crate::errors; @@ -237,7 +237,7 @@ pub(crate) fn expand_include_bytes( Ok((bytes, _bsp)) => { // Don't care about getting the span for the raw bytes, // because the console can't really show them anyway. - let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(bytes)); + let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(ByteSymbol::intern(&bytes))); MacEager::expr(expr) } Err(dummy) => dummy, diff --git a/compiler/rustc_expand/src/proc_macro_server.rs b/compiler/rustc_expand/src/proc_macro_server.rs index fb5abaefb570b..af91c8b8f0077 100644 --- a/compiler/rustc_expand/src/proc_macro_server.rs +++ b/compiler/rustc_expand/src/proc_macro_server.rs @@ -599,8 +599,12 @@ impl server::TokenStream for Rustc<'_, '_> { ast::ExprKind::Lit(token_lit) => { Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span)) } - ast::ExprKind::IncludedBytes(bytes) => { - let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None); + ast::ExprKind::IncludedBytes(byte_sym) => { + let lit = token::Lit::new( + token::ByteStr, + escape_byte_str_symbol(byte_sym.as_byte_str()), + None, + ); Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span)) } ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind { diff --git a/compiler/rustc_hir/src/arena.rs b/compiler/rustc_hir/src/arena.rs index b0dff635a9bdb..180cb6497e7ab 100644 --- a/compiler/rustc_hir/src/arena.rs +++ b/compiler/rustc_hir/src/arena.rs @@ -8,7 +8,6 @@ macro_rules! arena_types { [] asm_template: rustc_ast::InlineAsmTemplatePiece, [] attribute: rustc_hir::Attribute, [] owner_info: rustc_hir::OwnerInfo<'tcx>, - [] lit: rustc_hir::Lit, [] macro_def: rustc_ast::MacroDef, ]); ) diff --git a/compiler/rustc_hir/src/hir.rs b/compiler/rustc_hir/src/hir.rs index 75dff588669ab..464ff1487da94 100644 --- a/compiler/rustc_hir/src/hir.rs +++ b/compiler/rustc_hir/src/hir.rs @@ -1807,7 +1807,7 @@ pub struct PatExpr<'hir> { #[derive(Debug, Clone, Copy, HashStable_Generic)] pub enum PatExprKind<'hir> { Lit { - lit: &'hir Lit, + lit: Lit, // FIXME: move this into `Lit` and handle negated literal expressions // once instead of matching on unop neg expressions everywhere. negated: bool, @@ -2734,7 +2734,7 @@ pub enum ExprKind<'hir> { /// A unary operation (e.g., `!x`, `*x`). Unary(UnOp, &'hir Expr<'hir>), /// A literal (e.g., `1`, `"foo"`). - Lit(&'hir Lit), + Lit(Lit), /// A cast (e.g., `foo as f64`). Cast(&'hir Expr<'hir>, &'hir Ty<'hir>), /// A type ascription (e.g., `x: Foo`). See RFC 3307. diff --git a/compiler/rustc_hir/src/intravisit.rs b/compiler/rustc_hir/src/intravisit.rs index 57e49625148c2..a0bc318e2ca66 100644 --- a/compiler/rustc_hir/src/intravisit.rs +++ b/compiler/rustc_hir/src/intravisit.rs @@ -347,7 +347,7 @@ pub trait Visitor<'v>: Sized { fn visit_pat_expr(&mut self, expr: &'v PatExpr<'v>) -> Self::Result { walk_pat_expr(self, expr) } - fn visit_lit(&mut self, _hir_id: HirId, _lit: &'v Lit, _negated: bool) -> Self::Result { + fn visit_lit(&mut self, _hir_id: HirId, _lit: Lit, _negated: bool) -> Self::Result { Self::Result::output() } fn visit_anon_const(&mut self, c: &'v AnonConst) -> Self::Result { @@ -786,7 +786,7 @@ pub fn walk_pat_expr<'v, V: Visitor<'v>>(visitor: &mut V, expr: &'v PatExpr<'v>) let PatExpr { hir_id, span, kind } = expr; try_visit!(visitor.visit_id(*hir_id)); match kind { - PatExprKind::Lit { lit, negated } => visitor.visit_lit(*hir_id, lit, *negated), + PatExprKind::Lit { lit, negated } => visitor.visit_lit(*hir_id, *lit, *negated), PatExprKind::ConstBlock(c) => visitor.visit_inline_const(c), PatExprKind::Path(qpath) => visitor.visit_qpath(qpath, *hir_id, *span), } diff --git a/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs b/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs index 74739355e1fcb..55bf2ab6b50e1 100644 --- a/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs +++ b/compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs @@ -2364,9 +2364,9 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ { }; let lit_input = match expr.kind { - hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: false }), + hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: lit.node, ty, neg: false }), hir::ExprKind::Unary(hir::UnOp::Neg, expr) => match expr.kind { - hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: true }), + hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: lit.node, ty, neg: true }), _ => None, }, _ => None, diff --git a/compiler/rustc_hir_pretty/src/lib.rs b/compiler/rustc_hir_pretty/src/lib.rs index 087167dfd90bb..e00c22c47aa7d 100644 --- a/compiler/rustc_hir_pretty/src/lib.rs +++ b/compiler/rustc_hir_pretty/src/lib.rs @@ -1480,7 +1480,7 @@ impl<'a> State<'a> { self.print_expr_addr_of(k, m, expr); } hir::ExprKind::Lit(lit) => { - self.print_literal(lit); + self.print_literal(&lit); } hir::ExprKind::Cast(expr, ty) => { self.print_expr_cond_paren(expr, self.precedence(expr) < ExprPrecedence::Cast); diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs index c7b9cb470913c..3c53a060f7f84 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs @@ -1637,7 +1637,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { ast::LitKind::ByteStr(ref v, _) => Ty::new_imm_ref( tcx, tcx.lifetimes.re_static, - Ty::new_array(tcx, tcx.types.u8, v.len() as u64), + Ty::new_array(tcx, tcx.types.u8, v.as_byte_str().len() as u64), ), ast::LitKind::Byte(_) => tcx.types.u8, ast::LitKind::Char(_) => tcx.types.char, diff --git a/compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs b/compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs index 7e5f1d97a8bf4..dd6eb73a3a0aa 100644 --- a/compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs +++ b/compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs @@ -1624,7 +1624,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { node: rustc_ast::LitKind::Int(lit, rustc_ast::LitIntType::Unsuffixed), span, }) => { - let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(*span) else { + let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(span) else { return false; }; if !(snippet.starts_with("0x") || snippet.starts_with("0X")) { @@ -1683,7 +1683,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> { // We have satisfied all requirements to provide a suggestion. Emit it. err.span_suggestion( - *span, + span, format!("if you meant to create a null pointer, use `{null_path_str}()`"), null_path_str + "()", Applicability::MachineApplicable, diff --git a/compiler/rustc_lint/src/invalid_from_utf8.rs b/compiler/rustc_lint/src/invalid_from_utf8.rs index 11eb079ddc09d..41b670c92c4c5 100644 --- a/compiler/rustc_lint/src/invalid_from_utf8.rs +++ b/compiler/rustc_lint/src/invalid_from_utf8.rs @@ -108,8 +108,8 @@ impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 { } match init.kind { ExprKind::Lit(Spanned { node: lit, .. }) => { - if let LitKind::ByteStr(bytes, _) = &lit - && let Err(utf8_error) = std::str::from_utf8(bytes) + if let LitKind::ByteStr(byte_sym, _) = &lit + && let Err(utf8_error) = std::str::from_utf8(byte_sym.as_byte_str()) { lint(init.span, utf8_error); } diff --git a/compiler/rustc_lint/src/late.rs b/compiler/rustc_lint/src/late.rs index 852bb01c09655..c681deea779df 100644 --- a/compiler/rustc_lint/src/late.rs +++ b/compiler/rustc_lint/src/late.rs @@ -152,7 +152,7 @@ impl<'tcx, T: LateLintPass<'tcx>> hir_visit::Visitor<'tcx> for LateContextAndPas hir_visit::walk_pat(self, p); } - fn visit_lit(&mut self, hir_id: HirId, lit: &'tcx hir::Lit, negated: bool) { + fn visit_lit(&mut self, hir_id: HirId, lit: hir::Lit, negated: bool) { lint_callback!(self, check_lit, hir_id, lit, negated); } diff --git a/compiler/rustc_lint/src/passes.rs b/compiler/rustc_lint/src/passes.rs index 409a23d1da039..affea1b80ec54 100644 --- a/compiler/rustc_lint/src/passes.rs +++ b/compiler/rustc_lint/src/passes.rs @@ -23,7 +23,7 @@ macro_rules! late_lint_methods { fn check_stmt(a: &'tcx rustc_hir::Stmt<'tcx>); fn check_arm(a: &'tcx rustc_hir::Arm<'tcx>); fn check_pat(a: &'tcx rustc_hir::Pat<'tcx>); - fn check_lit(hir_id: rustc_hir::HirId, a: &'tcx rustc_hir::Lit, negated: bool); + fn check_lit(hir_id: rustc_hir::HirId, a: rustc_hir::Lit, negated: bool); fn check_expr(a: &'tcx rustc_hir::Expr<'tcx>); fn check_expr_post(a: &'tcx rustc_hir::Expr<'tcx>); fn check_ty(a: &'tcx rustc_hir::Ty<'tcx, rustc_hir::AmbigArg>); diff --git a/compiler/rustc_lint/src/types.rs b/compiler/rustc_lint/src/types.rs index aaba0c14b1ce4..ea5485d8e5da7 100644 --- a/compiler/rustc_lint/src/types.rs +++ b/compiler/rustc_lint/src/types.rs @@ -547,18 +547,12 @@ fn lint_fn_pointer<'tcx>( } impl<'tcx> LateLintPass<'tcx> for TypeLimits { - fn check_lit( - &mut self, - cx: &LateContext<'tcx>, - hir_id: HirId, - lit: &'tcx hir::Lit, - negated: bool, - ) { + fn check_lit(&mut self, cx: &LateContext<'tcx>, hir_id: HirId, lit: hir::Lit, negated: bool) { if negated { self.negated_expr_id = Some(hir_id); self.negated_expr_span = Some(lit.span); } - lint_literal(cx, self, hir_id, lit.span, lit, negated); + lint_literal(cx, self, hir_id, lit.span, &lit, negated); } fn check_expr(&mut self, cx: &LateContext<'tcx>, e: &'tcx hir::Expr<'tcx>) { diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 2696c47c62c9a..065c261c19473 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -32,7 +32,9 @@ use rustc_session::Session; use rustc_session::config::TargetModifier; use rustc_session::cstore::{CrateSource, ExternCrate}; use rustc_span::hygiene::HygieneDecodeContext; -use rustc_span::{BytePos, DUMMY_SP, Pos, SpanData, SpanDecoder, SyntaxContext, kw}; +use rustc_span::{ + BytePos, ByteSymbol, DUMMY_SP, Pos, SpanData, SpanDecoder, Symbol, SyntaxContext, kw, +}; use tracing::debug; use crate::creader::CStore; @@ -384,6 +386,28 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> { fn read_raw_bytes(&mut self, len: usize) -> &[u8] { self.opaque.read_raw_bytes(len) } + + fn decode_symbol_or_byte_symbol( + &mut self, + new_from_index: impl Fn(u32) -> S, + read_and_intern_str_or_byte_str_this: impl Fn(&mut Self) -> S, + read_and_intern_str_or_byte_str_opaque: impl Fn(&mut MemDecoder<'a>) -> S, + ) -> S { + let tag = self.read_u8(); + + match tag { + SYMBOL_STR => read_and_intern_str_or_byte_str_this(self), + SYMBOL_OFFSET => { + // read str offset + let pos = self.read_usize(); + + // move to str offset and read + self.opaque.with_position(pos, |d| read_and_intern_str_or_byte_str_opaque(d)) + } + SYMBOL_PREDEFINED => new_from_index(self.read_u32()), + _ => unreachable!(), + } + } } impl<'a, 'tcx> TyDecoder<'tcx> for DecodeContext<'a, 'tcx> { @@ -545,29 +569,19 @@ impl<'a, 'tcx> SpanDecoder for DecodeContext<'a, 'tcx> { } fn decode_symbol(&mut self) -> Symbol { - let tag = self.read_u8(); - - match tag { - SYMBOL_STR => { - let s = self.read_str(); - Symbol::intern(s) - } - SYMBOL_OFFSET => { - // read str offset - let pos = self.read_usize(); + self.decode_symbol_or_byte_symbol( + Symbol::new, + |this| Symbol::intern(this.read_str()), + |opaque| Symbol::intern(opaque.read_str()), + ) + } - // move to str offset and read - self.opaque.with_position(pos, |d| { - let s = d.read_str(); - Symbol::intern(s) - }) - } - SYMBOL_PREDEFINED => { - let symbol_index = self.read_u32(); - Symbol::new(symbol_index) - } - _ => unreachable!(), - } + fn decode_byte_symbol(&mut self) -> ByteSymbol { + self.decode_symbol_or_byte_symbol( + ByteSymbol::new, + |this| ByteSymbol::intern(this.read_byte_str()), + |opaque| ByteSymbol::intern(opaque.read_byte_str()), + ) } } diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index d74918235b6c3..b2696ddc902cd 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -29,8 +29,8 @@ use rustc_serialize::{Decodable, Decoder, Encodable, Encoder, opaque}; use rustc_session::config::{CrateType, OptLevel, TargetModifier}; use rustc_span::hygiene::HygieneEncodeContext; use rustc_span::{ - ExternalSource, FileName, SourceFile, SpanData, SpanEncoder, StableSourceFileId, SyntaxContext, - sym, + ByteSymbol, ExternalSource, FileName, SourceFile, SpanData, SpanEncoder, StableSourceFileId, + Symbol, SyntaxContext, sym, }; use tracing::{debug, instrument, trace}; @@ -63,7 +63,8 @@ pub(super) struct EncodeContext<'a, 'tcx> { required_source_files: Option>, is_proc_macro: bool, hygiene_ctxt: &'a HygieneEncodeContext, - symbol_table: FxHashMap, + // Used for both `Symbol`s and `ByteSymbol`s. + symbol_index_table: FxHashMap, } /// If the current crate is a proc-macro, returns early with `LazyArray::default()`. @@ -200,27 +201,14 @@ impl<'a, 'tcx> SpanEncoder for EncodeContext<'a, 'tcx> { } } - fn encode_symbol(&mut self, symbol: Symbol) { - // if symbol predefined, emit tag and symbol index - if symbol.is_predefined() { - self.opaque.emit_u8(SYMBOL_PREDEFINED); - self.opaque.emit_u32(symbol.as_u32()); - } else { - // otherwise write it as string or as offset to it - match self.symbol_table.entry(symbol) { - Entry::Vacant(o) => { - self.opaque.emit_u8(SYMBOL_STR); - let pos = self.opaque.position(); - o.insert(pos); - self.emit_str(symbol.as_str()); - } - Entry::Occupied(o) => { - let x = *o.get(); - self.emit_u8(SYMBOL_OFFSET); - self.emit_usize(x); - } - } - } + fn encode_symbol(&mut self, sym: Symbol) { + self.encode_symbol_or_byte_symbol(sym.as_u32(), |this| this.emit_str(sym.as_str())); + } + + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) { + self.encode_symbol_or_byte_symbol(byte_sym.as_u32(), |this| { + this.emit_byte_str(byte_sym.as_byte_str()) + }); } } @@ -492,6 +480,33 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> { LazyArray::from_position_and_num_elems(pos, len) } + fn encode_symbol_or_byte_symbol( + &mut self, + index: u32, + emit_str_or_byte_str: impl Fn(&mut Self), + ) { + // if symbol/byte symbol is predefined, emit tag and symbol index + if Symbol::is_predefined(index) { + self.opaque.emit_u8(SYMBOL_PREDEFINED); + self.opaque.emit_u32(index); + } else { + // otherwise write it as string or as offset to it + match self.symbol_index_table.entry(index) { + Entry::Vacant(o) => { + self.opaque.emit_u8(SYMBOL_STR); + let pos = self.opaque.position(); + o.insert(pos); + emit_str_or_byte_str(self); + } + Entry::Occupied(o) => { + let x = *o.get(); + self.emit_u8(SYMBOL_OFFSET); + self.emit_usize(x); + } + } + } + } + fn encode_def_path_table(&mut self) { let table = self.tcx.def_path_table(); if self.is_proc_macro { @@ -2427,7 +2442,7 @@ fn with_encode_metadata_header( required_source_files, is_proc_macro: tcx.crate_types().contains(&CrateType::ProcMacro), hygiene_ctxt: &hygiene_ctxt, - symbol_table: Default::default(), + symbol_index_table: Default::default(), }; // Encode the rustc version string in a predictable location. diff --git a/compiler/rustc_middle/src/mir/interpret/mod.rs b/compiler/rustc_middle/src/mir/interpret/mod.rs index ea2f84d46d7f7..da9e5bdbadd4e 100644 --- a/compiler/rustc_middle/src/mir/interpret/mod.rs +++ b/compiler/rustc_middle/src/mir/interpret/mod.rs @@ -77,7 +77,7 @@ impl<'tcx> GlobalId<'tcx> { #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash, HashStable)] pub struct LitToConstInput<'tcx> { /// The absolute value of the resultant constant. - pub lit: &'tcx LitKind, + pub lit: LitKind, /// The type of the constant. pub ty: Ty<'tcx>, /// If the constant is negative. diff --git a/compiler/rustc_middle/src/query/on_disk_cache.rs b/compiler/rustc_middle/src/query/on_disk_cache.rs index e1876f8f0f9b6..a7ac34428986e 100644 --- a/compiler/rustc_middle/src/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/query/on_disk_cache.rs @@ -20,8 +20,8 @@ use rustc_span::hygiene::{ }; use rustc_span::source_map::Spanned; use rustc_span::{ - BytePos, CachingSourceMapView, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span, - SpanDecoder, SpanEncoder, StableSourceFileId, Symbol, + BytePos, ByteSymbol, CachingSourceMapView, ExpnData, ExpnHash, Pos, RelativeBytePos, + SourceFile, Span, SpanDecoder, SpanEncoder, StableSourceFileId, Symbol, }; use crate::dep_graph::{DepNodeIndex, SerializedDepNodeIndex}; @@ -42,7 +42,7 @@ const TAG_RELATIVE_SPAN: u8 = 2; const TAG_SYNTAX_CONTEXT: u8 = 0; const TAG_EXPN_DATA: u8 = 1; -// Tags for encoding Symbol's +// Tags for encoding Symbols and ByteSymbols const SYMBOL_STR: u8 = 0; const SYMBOL_OFFSET: u8 = 1; const SYMBOL_PREDEFINED: u8 = 2; @@ -253,7 +253,7 @@ impl OnDiskCache { source_map: CachingSourceMapView::new(tcx.sess.source_map()), file_to_file_index, hygiene_context: &hygiene_encode_context, - symbol_table: Default::default(), + symbol_index_table: Default::default(), }; // Encode query results. @@ -479,6 +479,30 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> { .expect("failed to lookup `SourceFile` in new context") })) } + + // copy&paste impl from rustc_metadata + #[inline] + fn decode_symbol_or_byte_symbol( + &mut self, + new_from_index: impl Fn(u32) -> S, + read_and_intern_str_or_byte_str_this: impl Fn(&mut Self) -> S, + read_and_intern_str_or_byte_str_opaque: impl Fn(&mut MemDecoder<'a>) -> S, + ) -> S { + let tag = self.read_u8(); + + match tag { + SYMBOL_STR => read_and_intern_str_or_byte_str_this(self), + SYMBOL_OFFSET => { + // read str offset + let pos = self.read_usize(); + + // move to str offset and read + self.opaque.with_position(pos, |d| read_and_intern_str_or_byte_str_opaque(d)) + } + SYMBOL_PREDEFINED => new_from_index(self.read_u32()), + _ => unreachable!(), + } + } } // Decodes something that was encoded with `encode_tagged()` and verify that the @@ -653,32 +677,20 @@ impl<'a, 'tcx> SpanDecoder for CacheDecoder<'a, 'tcx> { Span::new(lo, hi, ctxt, parent) } - // copy&paste impl from rustc_metadata - #[inline] fn decode_symbol(&mut self) -> Symbol { - let tag = self.read_u8(); - - match tag { - SYMBOL_STR => { - let s = self.read_str(); - Symbol::intern(s) - } - SYMBOL_OFFSET => { - // read str offset - let pos = self.read_usize(); + self.decode_symbol_or_byte_symbol( + Symbol::new, + |this| Symbol::intern(this.read_str()), + |opaque| Symbol::intern(opaque.read_str()), + ) + } - // move to str offset and read - self.opaque.with_position(pos, |d| { - let s = d.read_str(); - Symbol::intern(s) - }) - } - SYMBOL_PREDEFINED => { - let symbol_index = self.read_u32(); - Symbol::new(symbol_index) - } - _ => unreachable!(), - } + fn decode_byte_symbol(&mut self) -> ByteSymbol { + self.decode_symbol_or_byte_symbol( + ByteSymbol::new, + |this| ByteSymbol::intern(this.read_byte_str()), + |opaque| ByteSymbol::intern(opaque.read_byte_str()), + ) } fn decode_crate_num(&mut self) -> CrateNum { @@ -807,7 +819,8 @@ pub struct CacheEncoder<'a, 'tcx> { source_map: CachingSourceMapView<'tcx>, file_to_file_index: FxHashMap<*const SourceFile, SourceFileIndex>, hygiene_context: &'a HygieneEncodeContext, - symbol_table: FxHashMap, + // Used for both `Symbol`s and `ByteSymbol`s. + symbol_index_table: FxHashMap, } impl<'a, 'tcx> CacheEncoder<'a, 'tcx> { @@ -831,6 +844,34 @@ impl<'a, 'tcx> CacheEncoder<'a, 'tcx> { ((end_pos - start_pos) as u64).encode(self); } + // copy&paste impl from rustc_metadata + fn encode_symbol_or_byte_symbol( + &mut self, + index: u32, + emit_str_or_byte_str: impl Fn(&mut Self), + ) { + // if symbol/byte symbol is predefined, emit tag and symbol index + if Symbol::is_predefined(index) { + self.encoder.emit_u8(SYMBOL_PREDEFINED); + self.encoder.emit_u32(index); + } else { + // otherwise write it as string or as offset to it + match self.symbol_index_table.entry(index) { + Entry::Vacant(o) => { + self.encoder.emit_u8(SYMBOL_STR); + let pos = self.encoder.position(); + o.insert(pos); + emit_str_or_byte_str(self); + } + Entry::Occupied(o) => { + let x = *o.get(); + self.emit_u8(SYMBOL_OFFSET); + self.emit_usize(x); + } + } + } + } + #[inline] fn finish(mut self) -> FileEncodeResult { self.encoder.finish() @@ -889,28 +930,14 @@ impl<'a, 'tcx> SpanEncoder for CacheEncoder<'a, 'tcx> { len.encode(self); } - // copy&paste impl from rustc_metadata - fn encode_symbol(&mut self, symbol: Symbol) { - // if symbol predefined, emit tag and symbol index - if symbol.is_predefined() { - self.encoder.emit_u8(SYMBOL_PREDEFINED); - self.encoder.emit_u32(symbol.as_u32()); - } else { - // otherwise write it as string or as offset to it - match self.symbol_table.entry(symbol) { - Entry::Vacant(o) => { - self.encoder.emit_u8(SYMBOL_STR); - let pos = self.encoder.position(); - o.insert(pos); - self.emit_str(symbol.as_str()); - } - Entry::Occupied(o) => { - let x = *o.get(); - self.emit_u8(SYMBOL_OFFSET); - self.emit_usize(x); - } - } - } + fn encode_symbol(&mut self, sym: Symbol) { + self.encode_symbol_or_byte_symbol(sym.as_u32(), |this| this.emit_str(sym.as_str())); + } + + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) { + self.encode_symbol_or_byte_symbol(byte_sym.as_u32(), |this| { + this.emit_byte_str(byte_sym.as_byte_str()) + }); } fn encode_crate_num(&mut self, crate_num: CrateNum) { diff --git a/compiler/rustc_middle/src/thir.rs b/compiler/rustc_middle/src/thir.rs index d0e72a86d8ac0..bda8dcadbced8 100644 --- a/compiler/rustc_middle/src/thir.rs +++ b/compiler/rustc_middle/src/thir.rs @@ -526,7 +526,7 @@ pub enum ExprKind<'tcx> { Closure(Box>), /// A literal. Literal { - lit: &'tcx hir::Lit, + lit: hir::Lit, neg: bool, }, /// For literals that don't correspond to anything in the HIR diff --git a/compiler/rustc_mir_build/src/builder/expr/as_constant.rs b/compiler/rustc_mir_build/src/builder/expr/as_constant.rs index eb8e98ec3644d..d0d0c21463f85 100644 --- a/compiler/rustc_mir_build/src/builder/expr/as_constant.rs +++ b/compiler/rustc_mir_build/src/builder/expr/as_constant.rs @@ -49,7 +49,7 @@ pub(crate) fn as_constant_inner<'tcx>( let Expr { ty, temp_lifetime: _, span, ref kind } = *expr; match *kind { ExprKind::Literal { lit, neg } => { - let const_ = lit_to_mir_constant(tcx, LitToConstInput { lit: &lit.node, ty, neg }); + let const_ = lit_to_mir_constant(tcx, LitToConstInput { lit: lit.node, ty, neg }); ConstOperand { span, user_ty: None, const_ } } @@ -128,34 +128,35 @@ fn lit_to_mir_constant<'tcx>(tcx: TyCtxt<'tcx>, lit_input: LitToConstInput<'tcx> (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Slice(_)) => { - let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8], ()); + let allocation = Allocation::from_bytes_byte_aligned_immutable(data.as_byte_str(), ()); let allocation = tcx.mk_const_alloc(allocation); ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() } } - (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { - let id = tcx.allocate_bytes_dedup(data, CTFE_ALLOC_SALT); + (ast::LitKind::ByteStr(byte_sym, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => { + let id = tcx.allocate_bytes_dedup(byte_sym.as_byte_str(), CTFE_ALLOC_SALT); ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx)) } - (ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) => + (ast::LitKind::CStr(byte_sym, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) => { - let allocation = Allocation::from_bytes_byte_aligned_immutable(data as &[u8], ()); + let allocation = + Allocation::from_bytes_byte_aligned_immutable(byte_sym.as_byte_str(), ()); let allocation = tcx.mk_const_alloc(allocation); ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() } } (ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => { - ConstValue::Scalar(Scalar::from_uint(*n, Size::from_bytes(1))) + ConstValue::Scalar(Scalar::from_uint(n, Size::from_bytes(1))) } (ast::LitKind::Int(n, _), ty::Uint(_)) if !neg => trunc(n.get()), (ast::LitKind::Int(n, _), ty::Int(_)) => { trunc(if neg { (n.get() as i128).overflowing_neg().0 as u128 } else { n.get() }) } (ast::LitKind::Float(n, _), ty::Float(fty)) => { - parse_float_into_constval(*n, *fty, neg).unwrap() + parse_float_into_constval(n, *fty, neg).unwrap() } - (ast::LitKind::Bool(b), ty::Bool) => ConstValue::Scalar(Scalar::from_bool(*b)), - (ast::LitKind::Char(c), ty::Char) => ConstValue::Scalar(Scalar::from_char(*c)), + (ast::LitKind::Bool(b), ty::Bool) => ConstValue::Scalar(Scalar::from_bool(b)), + (ast::LitKind::Char(c), ty::Char) => ConstValue::Scalar(Scalar::from_char(c)), (ast::LitKind::Err(guar), _) => { - return Const::Ty(Ty::new_error(tcx, *guar), ty::Const::new_error(tcx, *guar)); + return Const::Ty(Ty::new_error(tcx, guar), ty::Const::new_error(tcx, guar)); } _ => bug!("invalid lit/ty combination in `lit_to_mir_constant`: {lit:?}: {ty:?}"), }; diff --git a/compiler/rustc_mir_build/src/thir/constant.rs b/compiler/rustc_mir_build/src/thir/constant.rs index b4fa55e1c1fdb..8e218a380e9ea 100644 --- a/compiler/rustc_mir_build/src/thir/constant.rs +++ b/compiler/rustc_mir_build/src/thir/constant.rs @@ -43,27 +43,23 @@ pub(crate) fn lit_to_const<'tcx>( let str_bytes = s.as_str().as_bytes(); ty::ValTree::from_raw_bytes(tcx, str_bytes) } - (ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) + (ast::LitKind::ByteStr(byte_sym, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Slice(_) | ty::Array(..)) => { - let bytes = data as &[u8]; - ty::ValTree::from_raw_bytes(tcx, bytes) + ty::ValTree::from_raw_bytes(tcx, byte_sym.as_byte_str()) } - (ast::LitKind::ByteStr(data, _), ty::Slice(_) | ty::Array(..)) + (ast::LitKind::ByteStr(byte_sym, _), ty::Slice(_) | ty::Array(..)) if tcx.features().deref_patterns() => { // Byte string literal patterns may have type `[u8]` or `[u8; N]` if `deref_patterns` is // enabled, in order to allow, e.g., `deref!(b"..."): Vec`. - let bytes = data as &[u8]; - ty::ValTree::from_raw_bytes(tcx, bytes) + ty::ValTree::from_raw_bytes(tcx, byte_sym.as_byte_str()) } (ast::LitKind::Byte(n), ty::Uint(ty::UintTy::U8)) => { - ty::ValTree::from_scalar_int(tcx, (*n).into()) + ty::ValTree::from_scalar_int(tcx, n.into()) } - (ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) => - { - let bytes = data as &[u8]; - ty::ValTree::from_raw_bytes(tcx, bytes) + (ast::LitKind::CStr(byte_sym, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) => { + ty::ValTree::from_raw_bytes(tcx, byte_sym.as_byte_str()) } (ast::LitKind::Int(n, _), ty::Uint(ui)) if !neg => { let scalar_int = trunc(n.get(), *ui); @@ -76,15 +72,15 @@ pub(crate) fn lit_to_const<'tcx>( ); ty::ValTree::from_scalar_int(tcx, scalar_int) } - (ast::LitKind::Bool(b), ty::Bool) => ty::ValTree::from_scalar_int(tcx, (*b).into()), + (ast::LitKind::Bool(b), ty::Bool) => ty::ValTree::from_scalar_int(tcx, b.into()), (ast::LitKind::Float(n, _), ty::Float(fty)) => { - let bits = parse_float_into_scalar(*n, *fty, neg).unwrap_or_else(|| { + let bits = parse_float_into_scalar(n, *fty, neg).unwrap_or_else(|| { tcx.dcx().bug(format!("couldn't parse float literal: {:?}", lit_input.lit)) }); ty::ValTree::from_scalar_int(tcx, bits) } - (ast::LitKind::Char(c), ty::Char) => ty::ValTree::from_scalar_int(tcx, (*c).into()), - (ast::LitKind::Err(guar), _) => return ty::Const::new_error(tcx, *guar), + (ast::LitKind::Char(c), ty::Char) => ty::ValTree::from_scalar_int(tcx, c.into()), + (ast::LitKind::Err(guar), _) => return ty::Const::new_error(tcx, guar), _ => return ty::Const::new_misc_error(tcx), }; diff --git a/compiler/rustc_mir_build/src/thir/pattern/mod.rs b/compiler/rustc_mir_build/src/thir/pattern/mod.rs index fcd106d78e253..e44a440b5c13c 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/mod.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/mod.rs @@ -680,7 +680,7 @@ impl<'a, 'tcx> PatCtxt<'a, 'tcx> { Some(pat_ty) => pat_ty, None => self.typeck_results.node_type(expr.hir_id), }; - let lit_input = LitToConstInput { lit: &lit.node, ty: ct_ty, neg: *negated }; + let lit_input = LitToConstInput { lit: lit.node, ty: ct_ty, neg: *negated }; let constant = self.tcx.at(expr.span).lit_to_const(lit_input); self.const_to_pat(constant, ct_ty, expr.hir_id, lit.span).kind } diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs index 8940d10696d16..846710c339878 100644 --- a/compiler/rustc_serialize/src/serialize.rs +++ b/compiler/rustc_serialize/src/serialize.rs @@ -21,6 +21,11 @@ use thin_vec::ThinVec; /// [utf8]: https://en.wikipedia.org/w/index.php?title=UTF-8&oldid=1058865525#Codepage_layout const STR_SENTINEL: u8 = 0xC1; +/// For byte strings there are no bytes that canot occur. Just use this value +/// as a best-effort sentinel. There is no validation skipped so the potential +/// for badness is lower than in the `STR_SENTINEL` case. +const BYTE_STR_SENTINEL: u8 = 0xC2; + /// A note about error handling. /// /// Encoders may be fallible, but in practice failure is rare and there are so @@ -72,6 +77,13 @@ pub trait Encoder { self.emit_u8(STR_SENTINEL); } + #[inline] + fn emit_byte_str(&mut self, v: &[u8]) { + self.emit_usize(v.len()); + self.emit_raw_bytes(v); + self.emit_u8(BYTE_STR_SENTINEL); + } + fn emit_raw_bytes(&mut self, s: &[u8]); } @@ -122,9 +134,19 @@ pub trait Decoder { let len = self.read_usize(); let bytes = self.read_raw_bytes(len + 1); assert!(bytes[len] == STR_SENTINEL); + // SAFETY: the presence of `STR_SENTINEL` gives us high (but not + // perfect) confidence that the bytes we just read truly are UTF-8. unsafe { std::str::from_utf8_unchecked(&bytes[..len]) } } + #[inline] + fn read_byte_str(&mut self) -> &[u8] { + let len = self.read_usize(); + let bytes = self.read_raw_bytes(len + 1); + assert!(bytes[len] == BYTE_STR_SENTINEL); + &bytes[..len] + } + fn read_raw_bytes(&mut self, len: usize) -> &[u8]; fn peek_byte(&self) -> u8; @@ -239,7 +261,7 @@ impl Encodable for str { impl Encodable for String { fn encode(&self, s: &mut S) { - s.emit_str(&self[..]); + s.emit_str(&self); } } diff --git a/compiler/rustc_span/src/lib.rs b/compiler/rustc_span/src/lib.rs index c8a29a2f68fd9..3d3a681c798e9 100644 --- a/compiler/rustc_span/src/lib.rs +++ b/compiler/rustc_span/src/lib.rs @@ -66,7 +66,9 @@ mod span_encoding; pub use span_encoding::{DUMMY_SP, Span}; pub mod symbol; -pub use symbol::{Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym}; +pub use symbol::{ + ByteSymbol, Ident, MacroRulesNormalizedIdent, STDLIB_STABLE_CRATES, Symbol, kw, sym, +}; mod analyze_source_file; pub mod fatal_error; @@ -1184,11 +1186,12 @@ rustc_index::newtype_index! { /// It is similar to rustc_type_ir's TyEncoder. pub trait SpanEncoder: Encoder { fn encode_span(&mut self, span: Span); - fn encode_symbol(&mut self, symbol: Symbol); + fn encode_symbol(&mut self, sym: Symbol); + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol); fn encode_expn_id(&mut self, expn_id: ExpnId); fn encode_syntax_context(&mut self, syntax_context: SyntaxContext); - /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a tcx. - /// Therefore, make sure to include the context when encode a `CrateNum`. + /// As a local identifier, a `CrateNum` is only meaningful within its context, e.g. within a + /// tcx. Therefore, make sure to include the context when encode a `CrateNum`. fn encode_crate_num(&mut self, crate_num: CrateNum); fn encode_def_index(&mut self, def_index: DefIndex); fn encode_def_id(&mut self, def_id: DefId); @@ -1201,8 +1204,12 @@ impl SpanEncoder for FileEncoder { span.hi.encode(self); } - fn encode_symbol(&mut self, symbol: Symbol) { - self.emit_str(symbol.as_str()); + fn encode_symbol(&mut self, sym: Symbol) { + self.emit_str(sym.as_str()); + } + + fn encode_byte_symbol(&mut self, byte_sym: ByteSymbol) { + self.emit_byte_str(byte_sym.as_byte_str()); } fn encode_expn_id(&mut self, _expn_id: ExpnId) { @@ -1239,6 +1246,12 @@ impl Encodable for Symbol { } } +impl Encodable for ByteSymbol { + fn encode(&self, s: &mut E) { + s.encode_byte_symbol(*self); + } +} + impl Encodable for ExpnId { fn encode(&self, s: &mut E) { s.encode_expn_id(*self) @@ -1280,6 +1293,7 @@ impl Encodable for AttrId { pub trait SpanDecoder: Decoder { fn decode_span(&mut self) -> Span; fn decode_symbol(&mut self) -> Symbol; + fn decode_byte_symbol(&mut self) -> ByteSymbol; fn decode_expn_id(&mut self) -> ExpnId; fn decode_syntax_context(&mut self) -> SyntaxContext; fn decode_crate_num(&mut self) -> CrateNum; @@ -1300,6 +1314,10 @@ impl SpanDecoder for MemDecoder<'_> { Symbol::intern(self.read_str()) } + fn decode_byte_symbol(&mut self) -> ByteSymbol { + ByteSymbol::intern(self.read_byte_str()) + } + fn decode_expn_id(&mut self) -> ExpnId { panic!("cannot decode `ExpnId` with `MemDecoder`"); } @@ -1337,6 +1355,12 @@ impl Decodable for Symbol { } } +impl Decodable for ByteSymbol { + fn decode(s: &mut D) -> ByteSymbol { + s.decode_byte_symbol() + } +} + impl Decodable for ExpnId { fn decode(s: &mut D) -> ExpnId { s.decode_expn_id() diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 4b8762d0dd1c2..34869a38bb470 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -2583,7 +2583,7 @@ impl fmt::Display for MacroRulesNormalizedIdent { } } -/// An interned string. +/// An interned UTF-8 string. /// /// Internally, a `Symbol` is implemented as an index, and all operations /// (including hashing, equality, and ordering) operate on that index. The use @@ -2595,20 +2595,23 @@ impl fmt::Display for MacroRulesNormalizedIdent { #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct Symbol(SymbolIndex); +// Used within both `Symbol` and `ByteSymbol`. rustc_index::newtype_index! { #[orderable] struct SymbolIndex {} } impl Symbol { + /// Avoid this except for things like deserialization of previously + /// serialized symbols, and testing. Use `intern` instead. pub const fn new(n: u32) -> Self { Symbol(SymbolIndex::from_u32(n)) } /// Maps a string to its interned representation. #[rustc_diagnostic_item = "SymbolIntern"] - pub fn intern(string: &str) -> Self { - with_session_globals(|session_globals| session_globals.symbol_interner.intern(string)) + pub fn intern(str: &str) -> Self { + with_session_globals(|session_globals| session_globals.symbol_interner.intern_str(str)) } /// Access the underlying string. This is a slowish operation because it @@ -2621,7 +2624,7 @@ impl Symbol { /// it works out ok. pub fn as_str(&self) -> &str { with_session_globals(|session_globals| unsafe { - std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get(*self)) + std::mem::transmute::<&str, &str>(session_globals.symbol_interner.get_str(*self)) }) } @@ -2678,56 +2681,130 @@ impl StableCompare for Symbol { } } +/// Like `Symbol`, but for byte strings. `ByteSymbol` is used less widely, so +/// it has fewer operations defined than `Symbol`. +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ByteSymbol(SymbolIndex); + +impl ByteSymbol { + /// Avoid this except for things like deserialization of previously + /// serialized symbols, and testing. Use `intern` instead. + pub const fn new(n: u32) -> Self { + ByteSymbol(SymbolIndex::from_u32(n)) + } + + /// Maps a string to its interned representation. + pub fn intern(byte_str: &[u8]) -> Self { + with_session_globals(|session_globals| { + session_globals.symbol_interner.intern_byte_str(byte_str) + }) + } + + /// Like `Symbol::as_str`. + pub fn as_byte_str(&self) -> &[u8] { + with_session_globals(|session_globals| unsafe { + std::mem::transmute::<&[u8], &[u8]>(session_globals.symbol_interner.get_byte_str(*self)) + }) + } + + pub fn as_u32(self) -> u32 { + self.0.as_u32() + } +} + +impl fmt::Debug for ByteSymbol { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.as_byte_str(), f) + } +} + +impl HashStable for ByteSymbol { + #[inline] + fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) { + self.as_byte_str().hash_stable(hcx, hasher); + } +} + +// Interner used for both `Symbol`s and `ByteSymbol`s. If a string and a byte +// string with identical contents (e.g. "foo" and b"foo") are both interned, +// only one copy will be stored and the resulting `Symbol` and `ByteSymbol` +// will have the same index. pub(crate) struct Interner(Lock); -// The `&'static str`s in this type actually point into the arena. +// The `&'static [u8]`s in this type actually point into the arena. // // This type is private to prevent accidentally constructing more than one // `Interner` on the same thread, which makes it easy to mix up `Symbol`s // between `Interner`s. struct InternerInner { arena: DroplessArena, - strings: FxIndexSet<&'static str>, + byte_strs: FxIndexSet<&'static [u8]>, } impl Interner { + // These arguments are `&str`, but because of the sharing, we are + // effectively pre-interning all these strings for both `Symbol` and + // `ByteSymbol`. fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self { - let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied())); + let byte_strs = FxIndexSet::from_iter( + init.iter().copied().chain(extra.iter().copied()).map(|str| str.as_bytes()), + ); assert_eq!( - strings.len(), + byte_strs.len(), init.len() + extra.len(), - "there are duplicate symbols in the rustc symbol list and the extra symbols added by the driver", + "duplicate symbols in the rustc symbol list and the extra symbols added by the driver", ); - Interner(Lock::new(InternerInner { arena: Default::default(), strings })) + Interner(Lock::new(InternerInner { arena: Default::default(), byte_strs })) + } + + fn intern_str(&self, str: &str) -> Symbol { + Symbol::new(self.intern_inner(str.as_bytes())) + } + + fn intern_byte_str(&self, byte_str: &[u8]) -> ByteSymbol { + ByteSymbol::new(self.intern_inner(byte_str)) } #[inline] - fn intern(&self, string: &str) -> Symbol { + fn intern_inner(&self, byte_str: &[u8]) -> u32 { let mut inner = self.0.lock(); - if let Some(idx) = inner.strings.get_index_of(string) { - return Symbol::new(idx as u32); + if let Some(idx) = inner.byte_strs.get_index_of(byte_str) { + return idx as u32; } - let string: &str = inner.arena.alloc_str(string); + let byte_str: &[u8] = inner.arena.alloc_slice(byte_str); // SAFETY: we can extend the arena allocation to `'static` because we // only access these while the arena is still alive. - let string: &'static str = unsafe { &*(string as *const str) }; + let byte_str: &'static [u8] = unsafe { &*(byte_str as *const [u8]) }; // This second hash table lookup can be avoided by using `RawEntryMut`, // but this code path isn't hot enough for it to be worth it. See // #91445 for details. - let (idx, is_new) = inner.strings.insert_full(string); + let (idx, is_new) = inner.byte_strs.insert_full(byte_str); debug_assert!(is_new); // due to the get_index_of check above - Symbol::new(idx as u32) + idx as u32 } /// Get the symbol as a string. /// /// [`Symbol::as_str()`] should be used in preference to this function. - fn get(&self, symbol: Symbol) -> &str { - self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap() + fn get_str(&self, symbol: Symbol) -> &str { + let byte_str = self.get_inner(symbol.0.as_usize()); + // SAFETY: known to be a UTF8 string because it's a `Symbol`. + unsafe { str::from_utf8_unchecked(byte_str) } + } + + /// Get the symbol as a string. + /// + /// [`ByteSymbol::as_byte_str()`] should be used in preference to this function. + fn get_byte_str(&self, symbol: ByteSymbol) -> &[u8] { + self.get_inner(symbol.0.as_usize()) + } + + fn get_inner(&self, index: usize) -> &[u8] { + self.0.lock().byte_strs.get_index(index).unwrap() } } @@ -2822,9 +2899,11 @@ impl Symbol { self != sym::empty && self != kw::Underscore && !self.is_path_segment_keyword() } - /// Was this symbol predefined in the compiler's `symbols!` macro - pub fn is_predefined(self) -> bool { - self.as_u32() < PREDEFINED_SYMBOLS_COUNT + /// Was this symbol index predefined in the compiler's `symbols!` macro? + /// Note: this applies to both `Symbol`s and `ByteSymbol`s, which is why it + /// takes a `u32` argument instead of a `&self` argument. Use with care. + pub fn is_predefined(index: u32) -> bool { + index < PREDEFINED_SYMBOLS_COUNT } } diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs index 660d0d7179afa..bf0660aa51085 100644 --- a/compiler/rustc_span/src/symbol/tests.rs +++ b/compiler/rustc_span/src/symbol/tests.rs @@ -5,14 +5,14 @@ use crate::create_default_session_globals_then; fn interner_tests() { let i = Interner::prefill(&[], &[]); // first one is zero: - assert_eq!(i.intern("dog"), Symbol::new(0)); - // re-use gets the same entry: - assert_eq!(i.intern("dog"), Symbol::new(0)); + assert_eq!(i.intern_str("dog"), Symbol::new(0)); + // re-use gets the same entry, even with a `ByteSymbol` + assert_eq!(i.intern_byte_str(b"dog"), ByteSymbol::new(0)); // different string gets a different #: - assert_eq!(i.intern("cat"), Symbol::new(1)); - assert_eq!(i.intern("cat"), Symbol::new(1)); + assert_eq!(i.intern_byte_str(b"cat"), ByteSymbol::new(1)); + assert_eq!(i.intern_str("cat"), Symbol::new(1)); // dog is still at zero - assert_eq!(i.intern("dog"), Symbol::new(0)); + assert_eq!(i.intern_str("dog"), Symbol::new(0)); } #[test] diff --git a/compiler/rustc_ty_utils/src/consts.rs b/compiler/rustc_ty_utils/src/consts.rs index 60f8bd9d83ad3..eb751da7c7363 100644 --- a/compiler/rustc_ty_utils/src/consts.rs +++ b/compiler/rustc_ty_utils/src/consts.rs @@ -120,7 +120,7 @@ fn recurse_build<'tcx>( } &ExprKind::Literal { lit, neg } => { let sp = node.span; - tcx.at(sp).lit_to_const(LitToConstInput { lit: &lit.node, ty: node.ty, neg }) + tcx.at(sp).lit_to_const(LitToConstInput { lit: lit.node, ty: node.ty, neg }) } &ExprKind::NonHirLiteral { lit, user_ty: _ } => { let val = ty::ValTree::from_scalar_int(tcx, lit); diff --git a/src/tools/clippy/clippy_lints/src/approx_const.rs b/src/tools/clippy/clippy_lints/src/approx_const.rs index 852e48cbcaeec..5ed4c82634aa8 100644 --- a/src/tools/clippy/clippy_lints/src/approx_const.rs +++ b/src/tools/clippy/clippy_lints/src/approx_const.rs @@ -74,7 +74,7 @@ impl ApproxConstant { } impl LateLintPass<'_> for ApproxConstant { - fn check_lit(&mut self, cx: &LateContext<'_>, _hir_id: HirId, lit: &Lit, _negated: bool) { + fn check_lit(&mut self, cx: &LateContext<'_>, _hir_id: HirId, lit: Lit, _negated: bool) { match lit.node { LitKind::Float(s, LitFloatType::Suffixed(fty)) => match fty { FloatTy::F16 => self.check_known_consts(cx, lit.span, s, "f16"), diff --git a/src/tools/clippy/clippy_lints/src/bool_assert_comparison.rs b/src/tools/clippy/clippy_lints/src/bool_assert_comparison.rs index 8f95e44bf8531..581fe33ea0b38 100644 --- a/src/tools/clippy/clippy_lints/src/bool_assert_comparison.rs +++ b/src/tools/clippy/clippy_lints/src/bool_assert_comparison.rs @@ -42,7 +42,7 @@ fn extract_bool_lit(e: &Expr<'_>) -> Option { }) = e.kind && !e.span.from_expansion() { - Some(*b) + Some(b) } else { None } diff --git a/src/tools/clippy/clippy_lints/src/casts/manual_dangling_ptr.rs b/src/tools/clippy/clippy_lints/src/casts/manual_dangling_ptr.rs index d9e88d6a401ca..92910cf8adf5d 100644 --- a/src/tools/clippy/clippy_lints/src/casts/manual_dangling_ptr.rs +++ b/src/tools/clippy/clippy_lints/src/casts/manual_dangling_ptr.rs @@ -46,7 +46,7 @@ pub(super) fn check(cx: &LateContext<'_>, expr: &Expr<'_>, from: &Expr<'_>, to: fn is_expr_const_aligned(cx: &LateContext<'_>, expr: &Expr<'_>, to: &Ty<'_>) -> bool { match expr.kind { ExprKind::Call(fun, _) => is_align_of_call(cx, fun, to), - ExprKind::Lit(lit) => is_literal_aligned(cx, lit, to), + ExprKind::Lit(lit) => is_literal_aligned(cx, &lit, to), _ => false, } } diff --git a/src/tools/clippy/clippy_lints/src/casts/unnecessary_cast.rs b/src/tools/clippy/clippy_lints/src/casts/unnecessary_cast.rs index 010f09d4c1d31..c88a0539d70e1 100644 --- a/src/tools/clippy/clippy_lints/src/casts/unnecessary_cast.rs +++ b/src/tools/clippy/clippy_lints/src/casts/unnecessary_cast.rs @@ -243,7 +243,7 @@ fn lint_unnecessary_cast( ); } -fn get_numeric_literal<'e>(expr: &'e Expr<'e>) -> Option<&'e Lit> { +fn get_numeric_literal<'e>(expr: &'e Expr<'e>) -> Option { match expr.kind { ExprKind::Lit(lit) => Some(lit), ExprKind::Unary(UnOp::Neg, e) => { diff --git a/src/tools/clippy/clippy_lints/src/default_numeric_fallback.rs b/src/tools/clippy/clippy_lints/src/default_numeric_fallback.rs index 784214c29af9f..1507f1ed30539 100644 --- a/src/tools/clippy/clippy_lints/src/default_numeric_fallback.rs +++ b/src/tools/clippy/clippy_lints/src/default_numeric_fallback.rs @@ -83,7 +83,7 @@ impl<'a, 'tcx> NumericFallbackVisitor<'a, 'tcx> { } /// Check whether a passed literal has potential to cause fallback or not. - fn check_lit(&self, lit: &Lit, lit_ty: Ty<'tcx>, emit_hir_id: HirId) { + fn check_lit(&self, lit: Lit, lit_ty: Ty<'tcx>, emit_hir_id: HirId) { if !lit.span.in_external_macro(self.cx.sess().source_map()) && matches!(self.ty_bounds.last(), Some(ExplicitTyBound(false))) && matches!( @@ -210,7 +210,7 @@ impl<'tcx> Visitor<'tcx> for NumericFallbackVisitor<'_, 'tcx> { ExprKind::Lit(lit) => { let ty = self.cx.typeck_results().expr_ty(expr); - self.check_lit(lit, ty, expr.hir_id); + self.check_lit(*lit, ty, expr.hir_id); return; }, diff --git a/src/tools/clippy/clippy_lints/src/large_include_file.rs b/src/tools/clippy/clippy_lints/src/large_include_file.rs index 621a2af1d322b..8707612fbdd0a 100644 --- a/src/tools/clippy/clippy_lints/src/large_include_file.rs +++ b/src/tools/clippy/clippy_lints/src/large_include_file.rs @@ -57,7 +57,7 @@ impl LateLintPass<'_> for LargeIncludeFile { if let ExprKind::Lit(lit) = &expr.kind && let len = match &lit.node { // include_bytes - LitKind::ByteStr(bstr, _) => bstr.len(), + LitKind::ByteStr(bstr, _) => bstr.as_byte_str().len(), // include_str LitKind::Str(sym, _) => sym.as_str().len(), _ => return, diff --git a/src/tools/clippy/clippy_lints/src/manual_ignore_case_cmp.rs b/src/tools/clippy/clippy_lints/src/manual_ignore_case_cmp.rs index 57c03fbb2ed2b..f7d9ec1fae8e4 100644 --- a/src/tools/clippy/clippy_lints/src/manual_ignore_case_cmp.rs +++ b/src/tools/clippy/clippy_lints/src/manual_ignore_case_cmp.rs @@ -41,12 +41,12 @@ declare_clippy_lint! { declare_lint_pass!(ManualIgnoreCaseCmp => [MANUAL_IGNORE_CASE_CMP]); -enum MatchType<'a, 'b> { +enum MatchType<'a> { ToAscii(bool, Ty<'a>), - Literal(&'b LitKind), + Literal(LitKind), } -fn get_ascii_type<'a, 'b>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'b>) -> Option<(Span, MatchType<'a, 'b>)> { +fn get_ascii_type<'a>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'_>) -> Option<(Span, MatchType<'a>)> { if let MethodCall(path, expr, _, _) = kind { let is_lower = match path.ident.name { sym::to_ascii_lowercase => true, @@ -63,7 +63,7 @@ fn get_ascii_type<'a, 'b>(cx: &LateContext<'a>, kind: rustc_hir::ExprKind<'b>) - return Some((expr.span, ToAscii(is_lower, ty_raw))); } } else if let Lit(expr) = kind { - return Some((expr.span, Literal(&expr.node))); + return Some((expr.span, Literal(expr.node))); } None } diff --git a/src/tools/clippy/clippy_lints/src/manual_strip.rs b/src/tools/clippy/clippy_lints/src/manual_strip.rs index 9e911e61f1968..6bf43a1c6d47c 100644 --- a/src/tools/clippy/clippy_lints/src/manual_strip.rs +++ b/src/tools/clippy/clippy_lints/src/manual_strip.rs @@ -184,7 +184,7 @@ fn eq_pattern_length<'tcx>(cx: &LateContext<'tcx>, pattern: &Expr<'_>, expr: &'t .. }) = expr.kind { - constant_length(cx, pattern).is_some_and(|length| *n == length) + constant_length(cx, pattern).is_some_and(|length| n == length) } else { len_arg(cx, expr).is_some_and(|arg| eq_expr_value(cx, pattern, arg)) } diff --git a/src/tools/clippy/clippy_lints/src/matches/match_like_matches.rs b/src/tools/clippy/clippy_lints/src/matches/match_like_matches.rs index f14b69d91ce4b..5816da5695eb6 100644 --- a/src/tools/clippy/clippy_lints/src/matches/match_like_matches.rs +++ b/src/tools/clippy/clippy_lints/src/matches/match_like_matches.rs @@ -159,7 +159,7 @@ fn find_bool_lit(ex: &ExprKind<'_>) -> Option { node: LitKind::Bool(b), .. }) = exp.kind { - Some(*b) + Some(b) } else { None } diff --git a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs index dbb29ee776b18..ede68f3094138 100644 --- a/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs +++ b/src/tools/clippy/clippy_lints/src/matches/match_same_arms.rs @@ -12,7 +12,7 @@ use rustc_hir::{Arm, Expr, HirId, HirIdMap, HirIdMapEntry, HirIdSet, Pat, PatExp use rustc_lint::builtin::NON_EXHAUSTIVE_OMITTED_PATTERNS; use rustc_lint::{LateContext, LintContext}; use rustc_middle::ty; -use rustc_span::{ErrorGuaranteed, Span, Symbol}; +use rustc_span::{ByteSymbol, ErrorGuaranteed, Span, Symbol}; use super::MATCH_SAME_ARMS; @@ -193,7 +193,7 @@ enum NormalizedPat<'a> { Or(&'a [Self]), Path(Option), LitStr(Symbol), - LitBytes(&'a [u8]), + LitBytes(ByteSymbol), LitInt(u128), LitBool(bool), Range(PatRange), @@ -332,7 +332,9 @@ impl<'a> NormalizedPat<'a> { // TODO: Handle negative integers. They're currently treated as a wild match. PatExprKind::Lit { lit, negated: false } => match lit.node { LitKind::Str(sym, _) => Self::LitStr(sym), - LitKind::ByteStr(ref bytes, _) | LitKind::CStr(ref bytes, _) => Self::LitBytes(bytes), + LitKind::ByteStr(byte_sym, _) | LitKind::CStr(byte_sym, _) => { + Self::LitBytes(byte_sym) + } LitKind::Byte(val) => Self::LitInt(val.into()), LitKind::Char(val) => Self::LitInt(val.into()), LitKind::Int(val, _) => Self::LitInt(val.get()), diff --git a/src/tools/clippy/clippy_lints/src/methods/open_options.rs b/src/tools/clippy/clippy_lints/src/methods/open_options.rs index fd368024177ae..9b5f138295c31 100644 --- a/src/tools/clippy/clippy_lints/src/methods/open_options.rs +++ b/src/tools/clippy/clippy_lints/src/methods/open_options.rs @@ -76,7 +76,7 @@ fn get_open_options( .. } = span { - Argument::Set(*lit) + Argument::Set(lit) } else { // The function is called with a literal which is not a boolean literal. // This is theoretically possible, but not very likely. diff --git a/src/tools/clippy/clippy_lints/src/missing_asserts_for_indexing.rs b/src/tools/clippy/clippy_lints/src/missing_asserts_for_indexing.rs index c8e3462b24ef4..cf0c85990b150 100644 --- a/src/tools/clippy/clippy_lints/src/missing_asserts_for_indexing.rs +++ b/src/tools/clippy/clippy_lints/src/missing_asserts_for_indexing.rs @@ -104,7 +104,7 @@ fn len_comparison<'hir>( ) -> Option<(LengthComparison, usize, &'hir Expr<'hir>)> { macro_rules! int_lit_pat { ($id:ident) => { - ExprKind::Lit(&Spanned { + ExprKind::Lit(Spanned { node: LitKind::Int(Pu128($id), _), .. }) diff --git a/src/tools/clippy/clippy_lints/src/utils/author.rs b/src/tools/clippy/clippy_lints/src/utils/author.rs index 3a08531cf1c9c..ac92ab5a245cc 100644 --- a/src/tools/clippy/clippy_lints/src/utils/author.rs +++ b/src/tools/clippy/clippy_lints/src/utils/author.rs @@ -324,7 +324,7 @@ impl<'a, 'tcx> PrintVisitor<'a, 'tcx> { } } - fn lit(&self, lit: &Binding<&Lit>) { + fn lit(&self, lit: &Binding) { let kind = |kind| chain!(self, "let LitKind::{kind} = {lit}.node"); macro_rules! kind { ($($t:tt)*) => (kind(format_args!($($t)*))); diff --git a/src/tools/clippy/clippy_utils/src/consts.rs b/src/tools/clippy/clippy_utils/src/consts.rs index aaa071fd5c931..09299c869dcf2 100644 --- a/src/tools/clippy/clippy_utils/src/consts.rs +++ b/src/tools/clippy/clippy_utils/src/consts.rs @@ -4,8 +4,6 @@ //! executable MIR bodies, so we have to do this instead. #![allow(clippy::float_cmp)] -use std::sync::Arc; - use crate::source::{SpanRangeExt, walk_span_to_context}; use crate::{clip, is_direct_expn_of, sext, unsext}; @@ -38,7 +36,7 @@ pub enum Constant<'tcx> { /// A `String` (e.g., "abc"). Str(String), /// A binary string (e.g., `b"abc"`). - Binary(Arc<[u8]>), + Binary(Vec), /// A single `char` (e.g., `'a'`). Char(char), /// An integer's bit representation. @@ -306,7 +304,9 @@ pub fn lit_to_mir_constant<'tcx>(lit: &LitKind, ty: Option>) -> Constan match *lit { LitKind::Str(ref is, _) => Constant::Str(is.to_string()), LitKind::Byte(b) => Constant::Int(u128::from(b)), - LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => Constant::Binary(Arc::clone(s)), + LitKind::ByteStr(ref s, _) | LitKind::CStr(ref s, _) => { + Constant::Binary(s.as_byte_str().to_vec()) + } LitKind::Char(c) => Constant::Char(c), LitKind::Int(n, _) => Constant::Int(n.get()), LitKind::Float(ref is, LitFloatType::Suffixed(fty)) => match fty { @@ -568,7 +568,9 @@ impl<'tcx> ConstEvalCtxt<'tcx> { } else { match &lit.node { LitKind::Str(is, _) => Some(is.is_empty()), - LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => Some(s.is_empty()), + LitKind::ByteStr(s, _) | LitKind::CStr(s, _) => { + Some(s.as_byte_str().is_empty()) + } _ => None, } }