diff --git a/lightning/src/offers/invoice_request.rs b/lightning/src/offers/invoice_request.rs index cdd9cff2177..caf27513635 100644 --- a/lightning/src/offers/invoice_request.rs +++ b/lightning/src/offers/invoice_request.rs @@ -998,15 +998,43 @@ impl VerifiedInvoiceRequest { InvoiceRequestFields { payer_signing_pubkey: *payer_signing_pubkey, quantity: *quantity, - payer_note_truncated: payer_note.clone().map(|mut s| { - s.truncate(PAYER_NOTE_LIMIT); - UntrustedString(s) - }), + payer_note_truncated: payer_note + .clone() + // Truncate the payer note to `PAYER_NOTE_LIMIT` bytes, rounding + // down to the nearest valid UTF-8 code point boundary. + .map(|s| UntrustedString(string_truncate_safe(s, PAYER_NOTE_LIMIT))), human_readable_name: self.offer_from_hrn().clone(), } } } +/// `String::truncate(new_len)` panics if you split inside a UTF-8 code point, +/// which would leave the `String` containing invalid UTF-8. This function will +/// instead truncate the string to the next smaller code point boundary so the +/// truncated string always remains valid UTF-8. +/// +/// This can still split a grapheme cluster, but that's probably fine. +/// We'd otherwise have to pull in the `unicode-segmentation` crate and its big +/// unicode tables to find the next smaller grapheme cluster boundary. +fn string_truncate_safe(mut s: String, new_len: usize) -> String { + // Finds the largest byte index `x` not exceeding byte index `index` where + // `s.is_char_boundary(x)` is true. + // TODO(phlip9): remove when `std::str::floor_char_boundary` stabilizes. + let truncated_len = if new_len >= s.len() { + s.len() + } else { + // UTF-8 code points are 1-4 bytes long, so we can limit our search + // to this range: [new_len - 3, new_len] + let lower_bound_index = new_len.saturating_sub(3); + (lower_bound_index..=new_len) + .rev() + .find(|idx| s.is_char_boundary(*idx)) + .unwrap_or(lower_bound_index) + }; + s.truncate(truncated_len); + s +} + impl InvoiceRequestContents { pub(super) fn metadata(&self) -> &[u8] { self.inner.metadata() @@ -1426,6 +1454,7 @@ mod tests { use crate::ln::inbound_payment::ExpandedKey; use crate::ln::msgs::{DecodeError, MAX_VALUE_MSAT}; use crate::offers::invoice::{Bolt12Invoice, SIGNATURE_TAG as INVOICE_SIGNATURE_TAG}; + use crate::offers::invoice_request::string_truncate_safe; use crate::offers::merkle::{self, SignatureTlvStreamRef, TaggedHash, TlvStream}; use crate::offers::nonce::Nonce; #[cfg(not(c_bindings))] @@ -2947,6 +2976,12 @@ mod tests { .unwrap(); assert_eq!(offer.issuer_signing_pubkey(), Some(node_id)); + // UTF-8 payer note that we can't naively `.truncate(PAYER_NOTE_LIMIT)` + // because it would split a multi-byte UTF-8 code point. + let payer_note = "❤️".repeat(86); + assert_eq!(payer_note.len(), PAYER_NOTE_LIMIT + 4); + let expected_payer_note = "❤️".repeat(85); + let invoice_request = offer .request_invoice(&expanded_key, nonce, &secp_ctx, payment_id) .unwrap() @@ -2954,7 +2989,7 @@ mod tests { .unwrap() .quantity(1) .unwrap() - .payer_note("0".repeat(PAYER_NOTE_LIMIT * 2)) + .payer_note(payer_note) .build_and_sign() .unwrap(); match invoice_request.verify_using_metadata(&expanded_key, &secp_ctx) { @@ -2966,7 +3001,7 @@ mod tests { InvoiceRequestFields { payer_signing_pubkey: invoice_request.payer_signing_pubkey(), quantity: Some(1), - payer_note_truncated: Some(UntrustedString("0".repeat(PAYER_NOTE_LIMIT))), + payer_note_truncated: Some(UntrustedString(expected_payer_note)), human_readable_name: None, } ); @@ -2981,4 +3016,31 @@ mod tests { Err(_) => panic!("unexpected error"), } } + + #[test] + fn test_string_truncate_safe() { + // We'll correctly truncate to the nearest UTF-8 code point boundary: + // ❤ variation-selector + // e29da4 efb88f + let s = String::from("❤️"); + for idx in 0..(s.len() + 5) { + if idx >= s.len() { + assert_eq!(s, string_truncate_safe(s.clone(), idx)); + } else if (3..s.len()).contains(&idx) { + assert_eq!("❤", string_truncate_safe(s.clone(), idx)); + } else { + assert_eq!("", string_truncate_safe(s.clone(), idx)); + } + } + + // Every byte in an ASCII string is also a full UTF-8 code point. + let s = String::from("my ASCII string!"); + for idx in 0..(s.len() + 5) { + if idx >= s.len() { + assert_eq!(s, string_truncate_safe(s.clone(), idx)); + } else { + assert_eq!(s[..idx], string_truncate_safe(s.clone(), idx)); + } + } + } }