Skip to content

Introduce ByteSymbol #141875

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions compiler/rustc_ast/src/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
//! - [`UnOp`], [`BinOp`], and [`BinOpKind`]: Unary and binary operators.

use std::borrow::Cow;
use std::sync::Arc;
use std::{cmp, fmt};

pub use GenericArgs::*;
Expand All @@ -32,7 +31,7 @@ use rustc_data_structures::tagged_ptr::Tag;
use rustc_macros::{Decodable, Encodable, HashStable_Generic};
pub use rustc_span::AttrId;
use rustc_span::source_map::{Spanned, respan};
use rustc_span::{DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
use rustc_span::{ByteSymbol, DUMMY_SP, ErrorGuaranteed, Ident, Span, Symbol, kw, sym};
use thin_vec::{ThinVec, thin_vec};

pub use crate::format::*;
Expand Down Expand Up @@ -1801,10 +1800,17 @@ pub enum ExprKind {
Become(P<Expr>),

/// Bytes included via `include_bytes!`
///
/// Added for optimization purposes to avoid the need to escape
/// large binary blobs - should always behave like [`ExprKind::Lit`]
/// with a `ByteStr` literal.
IncludedBytes(Arc<[u8]>),
///
/// The value is stored as a `ByteSymbol`. It's unfortunate that we need to
/// intern (hash) the bytes because they're likely to be large and unique.
/// But it's necessary because this will eventually be lowered to
/// `LitKind::ByteStr`, which needs a `ByteSymbol` to impl `Copy` and avoid
/// arena allocation.
IncludedBytes(ByteSymbol),

/// A `format_args!()` expression.
FormatArgs(P<FormatArgs>),
Expand Down Expand Up @@ -2062,7 +2068,7 @@ impl YieldKind {
}

/// A literal in a meta item.
#[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)]
#[derive(Clone, Copy, Encodable, Decodable, Debug, HashStable_Generic)]
pub struct MetaItemLit {
/// The original literal as written in the source code.
pub symbol: Symbol,
Expand Down Expand Up @@ -2125,16 +2131,18 @@ pub enum LitFloatType {
/// deciding the `LitKind`. This means that float literals like `1f32` are
/// classified by this type as `Float`. This is different to `token::LitKind`
/// which does *not* consider the suffix.
#[derive(Clone, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
#[derive(Clone, Copy, Encodable, Decodable, Debug, Hash, Eq, PartialEq, HashStable_Generic)]
pub enum LitKind {
/// A string literal (`"foo"`). The symbol is unescaped, and so may differ
/// from the original token's symbol.
Str(Symbol, StrStyle),
/// A byte string (`b"foo"`). Not stored as a symbol because it might be
/// non-utf8, and symbols only allow utf8 strings.
ByteStr(Arc<[u8]>, StrStyle),
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end.
CStr(Arc<[u8]>, StrStyle),
/// A byte string (`b"foo"`). The symbol is unescaped, and so may differ
/// from the original token's symbol.
ByteStr(ByteSymbol, StrStyle),
/// A C String (`c"foo"`). Guaranteed to only have `\0` at the end. The
/// symbol is unescaped, and so may differ from the original token's
/// symbol.
CStr(ByteSymbol, StrStyle),
/// A byte char (`b'f'`).
Byte(u8),
/// A character literal (`'a'`).
Expand Down
25 changes: 12 additions & 13 deletions compiler/rustc_ast/src/util/literal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use std::{ascii, fmt, str};
use rustc_literal_escaper::{
MixedUnit, Mode, byte_from_char, unescape_byte, unescape_char, unescape_mixed, unescape_unicode,
};
use rustc_span::{Span, Symbol, kw, sym};
use rustc_span::{ByteSymbol, Span, Symbol, kw, sym};
use tracing::debug;

use crate::ast::{self, LitKind, MetaItemLit, StrStyle};
Expand Down Expand Up @@ -117,13 +117,12 @@ impl LitKind {
assert!(!err.is_fatal(), "failed to unescape string literal")
}
});
LitKind::ByteStr(buf.into(), StrStyle::Cooked)
LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
}
token::ByteStrRaw(n) => {
// Raw strings have no escapes so we can convert the symbol
// directly to a `Arc<u8>`.
// Raw byte strings have no escapes so no work is needed here.
let buf = symbol.as_str().to_owned().into_bytes();
LitKind::ByteStr(buf.into(), StrStyle::Raw(n))
LitKind::ByteStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
}
token::CStr => {
let s = symbol.as_str();
Expand All @@ -138,15 +137,15 @@ impl LitKind {
}
});
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Cooked)
LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Cooked)
}
token::CStrRaw(n) => {
// Raw strings have no escapes so we can convert the symbol
// directly to a `Arc<u8>` after appending the terminating NUL
// char.
let mut buf = symbol.as_str().to_owned().into_bytes();
buf.push(0);
LitKind::CStr(buf.into(), StrStyle::Raw(n))
LitKind::CStr(ByteSymbol::intern(&buf), StrStyle::Raw(n))
}
token::Err(guar) => LitKind::Err(guar),
})
Expand All @@ -168,12 +167,12 @@ impl fmt::Display for LitKind {
delim = "#".repeat(n as usize),
string = sym
)?,
LitKind::ByteStr(ref bytes, StrStyle::Cooked) => {
write!(f, "b\"{}\"", escape_byte_str_symbol(bytes))?
LitKind::ByteStr(ref byte_sym, StrStyle::Cooked) => {
write!(f, "b\"{}\"", escape_byte_str_symbol(byte_sym.as_byte_str()))?
}
LitKind::ByteStr(ref bytes, StrStyle::Raw(n)) => {
LitKind::ByteStr(ref byte_sym, StrStyle::Raw(n)) => {
// Unwrap because raw byte string literals can only contain ASCII.
let symbol = str::from_utf8(bytes).unwrap();
let symbol = str::from_utf8(byte_sym.as_byte_str()).unwrap();
write!(
f,
"br{delim}\"{string}\"{delim}",
Expand All @@ -182,11 +181,11 @@ impl fmt::Display for LitKind {
)?;
}
LitKind::CStr(ref bytes, StrStyle::Cooked) => {
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes))?
write!(f, "c\"{}\"", escape_byte_str_symbol(bytes.as_byte_str()))?
}
LitKind::CStr(ref bytes, StrStyle::Raw(n)) => {
// This can only be valid UTF-8.
let symbol = str::from_utf8(bytes).unwrap();
let symbol = str::from_utf8(bytes.as_byte_str()).unwrap();
write!(f, "cr{delim}\"{symbol}\"{delim}", delim = "#".repeat(n as usize),)?;
}
LitKind::Int(n, ty) => {
Expand Down
24 changes: 9 additions & 15 deletions compiler/rustc_ast_lowering/src/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,11 @@ impl<'hir> LoweringContext<'_, 'hir> {
hir::ExprKind::Unary(op, ohs)
}
ExprKind::Lit(token_lit) => hir::ExprKind::Lit(self.lower_lit(token_lit, e.span)),
ExprKind::IncludedBytes(bytes) => {
let lit = self.arena.alloc(respan(
ExprKind::IncludedBytes(byte_sym) => {
let lit = respan(
self.lower_span(e.span),
LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked),
));
LitKind::ByteStr(*byte_sym, StrStyle::Cooked),
);
hir::ExprKind::Lit(lit)
}
ExprKind::Cast(expr, ty) => {
Expand Down Expand Up @@ -420,19 +420,15 @@ impl<'hir> LoweringContext<'_, 'hir> {
})
}

pub(crate) fn lower_lit(
&mut self,
token_lit: &token::Lit,
span: Span,
) -> &'hir Spanned<LitKind> {
pub(crate) fn lower_lit(&mut self, token_lit: &token::Lit, span: Span) -> hir::Lit {
let lit_kind = match LitKind::from_token_lit(*token_lit) {
Ok(lit_kind) => lit_kind,
Err(err) => {
let guar = report_lit_error(&self.tcx.sess.psess, err, *token_lit, span);
LitKind::Err(guar)
}
};
self.arena.alloc(respan(self.lower_span(span), lit_kind))
respan(self.lower_span(span), lit_kind)
}

fn lower_unop(&mut self, u: UnOp) -> hir::UnOp {
Expand Down Expand Up @@ -2140,10 +2136,10 @@ impl<'hir> LoweringContext<'_, 'hir> {
}

fn expr_uint(&mut self, sp: Span, ty: ast::UintTy, value: u128) -> hir::Expr<'hir> {
let lit = self.arena.alloc(hir::Lit {
let lit = hir::Lit {
span: sp,
node: ast::LitKind::Int(value.into(), ast::LitIntType::Unsigned(ty)),
});
};
self.expr(sp, hir::ExprKind::Lit(lit))
}

Expand All @@ -2160,9 +2156,7 @@ impl<'hir> LoweringContext<'_, 'hir> {
}

pub(super) fn expr_str(&mut self, sp: Span, value: Symbol) -> hir::Expr<'hir> {
let lit = self
.arena
.alloc(hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) });
let lit = hir::Lit { span: sp, node: ast::LitKind::Str(value, ast::StrStyle::Cooked) };
self.expr(sp, hir::ExprKind::Lit(lit))
}

Expand Down
12 changes: 4 additions & 8 deletions compiler/rustc_ast_lowering/src/pat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -390,19 +390,15 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
allow_paths: bool,
) -> &'hir hir::PatExpr<'hir> {
let span = self.lower_span(expr.span);
let err = |guar| hir::PatExprKind::Lit {
lit: self.arena.alloc(respan(span, LitKind::Err(guar))),
negated: false,
};
let err =
|guar| hir::PatExprKind::Lit { lit: respan(span, LitKind::Err(guar)), negated: false };
let kind = match &expr.kind {
ExprKind::Lit(lit) => {
hir::PatExprKind::Lit { lit: self.lower_lit(lit, span), negated: false }
}
ExprKind::ConstBlock(c) => hir::PatExprKind::ConstBlock(self.lower_const_block(c)),
ExprKind::IncludedBytes(bytes) => hir::PatExprKind::Lit {
lit: self
.arena
.alloc(respan(span, LitKind::ByteStr(Arc::clone(bytes), StrStyle::Cooked))),
ExprKind::IncludedBytes(byte_sym) => hir::PatExprKind::Lit {
lit: respan(span, LitKind::ByteStr(*byte_sym, StrStyle::Cooked)),
negated: false,
},
ExprKind::Err(guar) => err(*guar),
Expand Down
8 changes: 6 additions & 2 deletions compiler/rustc_ast_pretty/src/pprust/state/expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -465,8 +465,12 @@ impl<'a> State<'a> {
ast::ExprKind::Lit(token_lit) => {
self.print_token_literal(*token_lit, expr.span);
}
ast::ExprKind::IncludedBytes(bytes) => {
let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None);
ast::ExprKind::IncludedBytes(byte_sym) => {
let lit = token::Lit::new(
token::ByteStr,
escape_byte_str_symbol(byte_sym.as_byte_str()),
None,
);
self.print_token_literal(lit, expr.span)
}
ast::ExprKind::Cast(expr, ty) => {
Expand Down
8 changes: 4 additions & 4 deletions compiler/rustc_builtin_macros/src/concat_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,15 +177,15 @@ pub(crate) fn expand_concat_bytes(
Ok(LitKind::Byte(val)) => {
accumulator.push(val);
}
Ok(LitKind::ByteStr(ref bytes, _)) => {
accumulator.extend_from_slice(bytes);
Ok(LitKind::ByteStr(ref byte_sym, _)) => {
accumulator.extend_from_slice(byte_sym.as_byte_str());
}
_ => {
guar.get_or_insert_with(|| invalid_type_err(cx, token_lit, e.span, false));
}
},
ExprKind::IncludedBytes(bytes) => {
accumulator.extend_from_slice(bytes);
ExprKind::IncludedBytes(byte_sym) => {
accumulator.extend_from_slice(byte_sym.as_byte_str());
}
ExprKind::Err(guarantee) => {
guar = Some(*guarantee);
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_builtin_macros/src/source_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use rustc_parse::parser::{ForceCollect, Parser};
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
use rustc_span::source_map::SourceMap;
use rustc_span::{Pos, Span, Symbol};
use rustc_span::{ByteSymbol, Pos, Span, Symbol};
use smallvec::SmallVec;

use crate::errors;
Expand Down Expand Up @@ -237,7 +237,7 @@ pub(crate) fn expand_include_bytes(
Ok((bytes, _bsp)) => {
// Don't care about getting the span for the raw bytes,
// because the console can't really show them anyway.
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(bytes));
let expr = cx.expr(sp, ast::ExprKind::IncludedBytes(ByteSymbol::intern(&bytes)));
MacEager::expr(expr)
}
Err(dummy) => dummy,
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_ssa/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ impl CodegenResults {
// `emit_raw_bytes` is used to make sure that the version representation does not depend on
// Encoder's inner representation of `u32`.
encoder.emit_raw_bytes(&RLINK_VERSION.to_be_bytes());
encoder.emit_str(sess.cfg_version);
encoder.emit_byte_str(sess.cfg_version.as_bytes());
Encodable::encode(codegen_results, &mut encoder);
Encodable::encode(metadata, &mut encoder);
Encodable::encode(outputs, &mut encoder);
Expand Down
8 changes: 6 additions & 2 deletions compiler/rustc_expand/src/proc_macro_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -599,8 +599,12 @@ impl server::TokenStream for Rustc<'_, '_> {
ast::ExprKind::Lit(token_lit) => {
Ok(tokenstream::TokenStream::token_alone(token::Literal(*token_lit), expr.span))
}
ast::ExprKind::IncludedBytes(bytes) => {
let lit = token::Lit::new(token::ByteStr, escape_byte_str_symbol(bytes), None);
ast::ExprKind::IncludedBytes(byte_sym) => {
let lit = token::Lit::new(
token::ByteStr,
escape_byte_str_symbol(byte_sym.as_byte_str()),
None,
);
Ok(tokenstream::TokenStream::token_alone(token::TokenKind::Literal(lit), expr.span))
}
ast::ExprKind::Unary(ast::UnOp::Neg, e) => match &e.kind {
Expand Down
1 change: 0 additions & 1 deletion compiler/rustc_hir/src/arena.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ macro_rules! arena_types {
[] asm_template: rustc_ast::InlineAsmTemplatePiece,
[] attribute: rustc_hir::Attribute,
[] owner_info: rustc_hir::OwnerInfo<'tcx>,
[] lit: rustc_hir::Lit,
[] macro_def: rustc_ast::MacroDef,
]);
)
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir/src/hir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1807,7 +1807,7 @@ pub struct PatExpr<'hir> {
#[derive(Debug, Clone, Copy, HashStable_Generic)]
pub enum PatExprKind<'hir> {
Lit {
lit: &'hir Lit,
lit: Lit,
// FIXME: move this into `Lit` and handle negated literal expressions
// once instead of matching on unop neg expressions everywhere.
negated: bool,
Expand Down Expand Up @@ -2734,7 +2734,7 @@ pub enum ExprKind<'hir> {
/// A unary operation (e.g., `!x`, `*x`).
Unary(UnOp, &'hir Expr<'hir>),
/// A literal (e.g., `1`, `"foo"`).
Lit(&'hir Lit),
Lit(Lit),
/// A cast (e.g., `foo as f64`).
Cast(&'hir Expr<'hir>, &'hir Ty<'hir>),
/// A type ascription (e.g., `x: Foo`). See RFC 3307.
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir/src/intravisit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ pub trait Visitor<'v>: Sized {
fn visit_pat_expr(&mut self, expr: &'v PatExpr<'v>) -> Self::Result {
walk_pat_expr(self, expr)
}
fn visit_lit(&mut self, _hir_id: HirId, _lit: &'v Lit, _negated: bool) -> Self::Result {
fn visit_lit(&mut self, _hir_id: HirId, _lit: Lit, _negated: bool) -> Self::Result {
Self::Result::output()
}
fn visit_anon_const(&mut self, c: &'v AnonConst) -> Self::Result {
Expand Down Expand Up @@ -786,7 +786,7 @@ pub fn walk_pat_expr<'v, V: Visitor<'v>>(visitor: &mut V, expr: &'v PatExpr<'v>)
let PatExpr { hir_id, span, kind } = expr;
try_visit!(visitor.visit_id(*hir_id));
match kind {
PatExprKind::Lit { lit, negated } => visitor.visit_lit(*hir_id, lit, *negated),
PatExprKind::Lit { lit, negated } => visitor.visit_lit(*hir_id, *lit, *negated),
PatExprKind::ConstBlock(c) => visitor.visit_inline_const(c),
PatExprKind::Path(qpath) => visitor.visit_qpath(qpath, *hir_id, *span),
}
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir_analysis/src/hir_ty_lowering/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2364,9 +2364,9 @@ impl<'tcx> dyn HirTyLowerer<'tcx> + '_ {
};

let lit_input = match expr.kind {
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: false }),
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: lit.node, ty, neg: false }),
hir::ExprKind::Unary(hir::UnOp::Neg, expr) => match expr.kind {
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: &lit.node, ty, neg: true }),
hir::ExprKind::Lit(lit) => Some(LitToConstInput { lit: lit.node, ty, neg: true }),
_ => None,
},
_ => None,
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_hir_pretty/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1476,7 +1476,7 @@ impl<'a> State<'a> {
self.print_expr_addr_of(k, m, expr);
}
hir::ExprKind::Lit(lit) => {
self.print_literal(lit);
self.print_literal(&lit);
}
hir::ExprKind::Cast(expr, ty) => {
self.print_expr_cond_paren(expr, self.precedence(expr) < ExprPrecedence::Cast);
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_hir_typeck/src/fn_ctxt/checks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1634,7 +1634,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
ast::LitKind::ByteStr(ref v, _) => Ty::new_imm_ref(
tcx,
tcx.lifetimes.re_static,
Ty::new_array(tcx, tcx.types.u8, v.len() as u64),
Ty::new_array(tcx, tcx.types.u8, v.as_byte_str().len() as u64),
),
ast::LitKind::Byte(_) => tcx.types.u8,
ast::LitKind::Char(_) => tcx.types.char,
Expand Down
4 changes: 2 additions & 2 deletions compiler/rustc_hir_typeck/src/fn_ctxt/suggestions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1624,7 +1624,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
node: rustc_ast::LitKind::Int(lit, rustc_ast::LitIntType::Unsuffixed),
span,
}) => {
let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(*span) else {
let Ok(snippet) = self.tcx.sess.source_map().span_to_snippet(span) else {
return false;
};
if !(snippet.starts_with("0x") || snippet.starts_with("0X")) {
Expand Down Expand Up @@ -1683,7 +1683,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {

// We have satisfied all requirements to provide a suggestion. Emit it.
err.span_suggestion(
*span,
span,
format!("if you meant to create a null pointer, use `{null_path_str}()`"),
null_path_str + "()",
Applicability::MachineApplicable,
Expand Down
Loading
Loading