Skip to content

Commit f082ad4

Browse files
committed
Disallow taking two instances of the same mutex at the same time
Taking two instances of the same mutex may be totally fine, but it requires a total lockorder that we cannot (trivially) check. Thus, its generally unsafe to do if we can avoid it. To discourage doing this, here we default to panicing on such locks in our lockorder tests, with a separate lock function added that is clearly labeled "unsafe" to allow doing so when we can guarantee a total lockorder. This requires adapting a number of sites to the new API, including fixing a bug this turned up in `ChannelMonitor`'s `PartialEq` where no lockorder was guaranteed.
1 parent 9c08fbd commit f082ad4

File tree

10 files changed

+118
-52
lines changed

10 files changed

+118
-52
lines changed

lightning/src/chain/channelmonitor.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ use core::{cmp, mem};
6060
use crate::io::{self, Error};
6161
use core::convert::TryInto;
6262
use core::ops::Deref;
63-
use crate::sync::Mutex;
63+
use crate::sync::{Mutex, LockTestExt};
6464

6565
/// An update generated by the underlying channel itself which contains some new information the
6666
/// [`ChannelMonitor`] should be made aware of.
@@ -851,9 +851,13 @@ pub type TransactionOutputs = (Txid, Vec<(u32, TxOut)>);
851851

852852
impl<Signer: WriteableEcdsaChannelSigner> PartialEq for ChannelMonitor<Signer> where Signer: PartialEq {
853853
fn eq(&self, other: &Self) -> bool {
854-
let inner = self.inner.lock().unwrap();
855-
let other = other.inner.lock().unwrap();
856-
inner.eq(&other)
854+
// We need some kind of total lockorder. Absent a better idea, we sort by position in
855+
// memory and take locks in that order (assuming that we can't move within memory while a
856+
// lock is held).
857+
let ord = ((self as *const _) as usize) < ((other as *const _) as usize);
858+
let a = if ord { self.inner.unsafe_well_ordered_double_lock_self() } else { other.inner.unsafe_well_ordered_double_lock_self() };
859+
let b = if ord { other.inner.unsafe_well_ordered_double_lock_self() } else { self.inner.unsafe_well_ordered_double_lock_self() };
860+
a.eq(&b)
857861
}
858862
}
859863

lightning/src/ln/chanmon_update_fail_tests.rs

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,12 +108,13 @@ fn test_monitor_and_persister_update_fail() {
108108
blocks: Arc::new(Mutex::new(vec![(genesis_block(Network::Testnet), 200); 200])),
109109
};
110110
let chain_mon = {
111-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
112-
let mut w = test_utils::TestVecWriter(Vec::new());
113-
monitor.write(&mut w).unwrap();
114-
let new_monitor = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(
115-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
116-
assert!(new_monitor == *monitor);
111+
let new_monitor = {
112+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
113+
let new_monitor = <(BlockHash, ChannelMonitor<EnforcingSigner>)>::read(
114+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
115+
assert!(new_monitor == *monitor);
116+
new_monitor
117+
};
117118
let chain_mon = test_utils::TestChainMonitor::new(Some(&chain_source), &tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
118119
assert_eq!(chain_mon.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
119120
chain_mon

lightning/src/ln/channelmanager.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6941,7 +6941,10 @@ where
69416941
let mut monitor_update_blocked_actions_per_peer = None;
69426942
let mut peer_states = Vec::new();
69436943
for (_, peer_state_mutex) in per_peer_state.iter() {
6944-
peer_states.push(peer_state_mutex.lock().unwrap());
6944+
// Because we're holding the owning `per_peer_state` write lock here there's no chance
6945+
// of a lockorder violation deadlock - no other thread can be holding any
6946+
// per_peer_state lock at all.
6947+
peer_states.push(peer_state_mutex.unsafe_well_ordered_double_lock_self());
69456948
}
69466949

69476950
(serializable_peer_count).write(writer)?;
@@ -8280,18 +8283,20 @@ mod tests {
82808283
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
82818284
assert_eq!(nodes_0_lock.len(), 1);
82828285
assert!(nodes_0_lock.contains_key(channel_id));
8283-
8284-
assert_eq!(nodes[1].node.id_to_peer.lock().unwrap().len(), 0);
82858286
}
82868287

8288+
assert_eq!(nodes[1].node.id_to_peer.lock().unwrap().len(), 0);
8289+
82878290
let funding_created_msg = get_event_msg!(nodes[0], MessageSendEvent::SendFundingCreated, nodes[1].node.get_our_node_id());
82888291

82898292
nodes[1].node.handle_funding_created(&nodes[0].node.get_our_node_id(), &funding_created_msg);
82908293
{
82918294
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
82928295
assert_eq!(nodes_0_lock.len(), 1);
82938296
assert!(nodes_0_lock.contains_key(channel_id));
8297+
}
82948298

8299+
{
82958300
// Assert that `nodes[1]`'s `id_to_peer` map is populated with the channel as soon as
82968301
// as it has the funding transaction.
82978302
let nodes_1_lock = nodes[1].node.id_to_peer.lock().unwrap();
@@ -8321,7 +8326,9 @@ mod tests {
83218326
let nodes_0_lock = nodes[0].node.id_to_peer.lock().unwrap();
83228327
assert_eq!(nodes_0_lock.len(), 1);
83238328
assert!(nodes_0_lock.contains_key(channel_id));
8329+
}
83248330

8331+
{
83258332
// At this stage, `nodes[1]` has proposed a fee for the closing transaction in the
83268333
// `handle_closing_signed` call above. As `nodes[1]` has not yet received the signature
83278334
// from `nodes[0]` for the closing transaction with the proposed fee, the channel is

lightning/src/ln/functional_test_utils.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ use crate::io;
4444
use crate::prelude::*;
4545
use core::cell::RefCell;
4646
use alloc::rc::Rc;
47-
use crate::sync::{Arc, Mutex};
47+
use crate::sync::{Arc, Mutex, LockTestExt};
4848
use core::mem;
4949
use core::iter::repeat;
5050
use bitcoin::{PackedLockTime, TxMerkleNode};
@@ -466,8 +466,8 @@ impl<'a, 'b, 'c> Drop for Node<'a, 'b, 'c> {
466466
panic!();
467467
}
468468
}
469-
assert_eq!(*chain_source.watched_txn.lock().unwrap(), *self.chain_source.watched_txn.lock().unwrap());
470-
assert_eq!(*chain_source.watched_outputs.lock().unwrap(), *self.chain_source.watched_outputs.lock().unwrap());
469+
assert_eq!(*chain_source.watched_txn.unsafe_well_ordered_double_lock_self(), *self.chain_source.watched_txn.unsafe_well_ordered_double_lock_self());
470+
assert_eq!(*chain_source.watched_outputs.unsafe_well_ordered_double_lock_self(), *self.chain_source.watched_outputs.unsafe_well_ordered_double_lock_self());
471471
}
472472
}
473473
}

lightning/src/ln/functional_tests.rs

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -8150,12 +8150,13 @@ fn test_update_err_monitor_lockdown() {
81508150
let logger = test_utils::TestLogger::with_id(format!("node {}", 0));
81518151
let persister = test_utils::TestPersister::new();
81528152
let watchtower = {
8153-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8154-
let mut w = test_utils::TestVecWriter(Vec::new());
8155-
monitor.write(&mut w).unwrap();
8156-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8157-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8158-
assert!(new_monitor == *monitor);
8153+
let new_monitor = {
8154+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8155+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8156+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8157+
assert!(new_monitor == *monitor);
8158+
new_monitor
8159+
};
81598160
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
81608161
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
81618162
watchtower
@@ -8217,12 +8218,13 @@ fn test_concurrent_monitor_claim() {
82178218
let logger = test_utils::TestLogger::with_id(format!("node {}", "Alice"));
82188219
let persister = test_utils::TestPersister::new();
82198220
let watchtower_alice = {
8220-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8221-
let mut w = test_utils::TestVecWriter(Vec::new());
8222-
monitor.write(&mut w).unwrap();
8223-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8224-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8225-
assert!(new_monitor == *monitor);
8221+
let new_monitor = {
8222+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8223+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8224+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8225+
assert!(new_monitor == *monitor);
8226+
new_monitor
8227+
};
82268228
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
82278229
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
82288230
watchtower
@@ -8246,12 +8248,13 @@ fn test_concurrent_monitor_claim() {
82468248
let logger = test_utils::TestLogger::with_id(format!("node {}", "Bob"));
82478249
let persister = test_utils::TestPersister::new();
82488250
let watchtower_bob = {
8249-
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8250-
let mut w = test_utils::TestVecWriter(Vec::new());
8251-
monitor.write(&mut w).unwrap();
8252-
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8253-
&mut io::Cursor::new(&w.0), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8254-
assert!(new_monitor == *monitor);
8251+
let new_monitor = {
8252+
let monitor = nodes[0].chain_monitor.chain_monitor.get_monitor(outpoint).unwrap();
8253+
let new_monitor = <(BlockHash, channelmonitor::ChannelMonitor<EnforcingSigner>)>::read(
8254+
&mut io::Cursor::new(&monitor.encode()), (nodes[0].keys_manager, nodes[0].keys_manager)).unwrap().1;
8255+
assert!(new_monitor == *monitor);
8256+
new_monitor
8257+
};
82558258
let watchtower = test_utils::TestChainMonitor::new(Some(&chain_source), &chanmon_cfgs[0].tx_broadcaster, &logger, &chanmon_cfgs[0].fee_estimator, &persister, &node_cfgs[0].keys_manager);
82568259
assert_eq!(watchtower.watch_channel(outpoint, new_monitor), ChannelMonitorUpdateStatus::Completed);
82578260
watchtower

lightning/src/routing/utxo.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use crate::util::ser::Writeable;
2626
use crate::prelude::*;
2727

2828
use alloc::sync::{Arc, Weak};
29-
use crate::sync::Mutex;
29+
use crate::sync::{Mutex, LockTestExt};
3030
use core::ops::Deref;
3131

3232
/// An error when accessing the chain via [`UtxoLookup`].
@@ -404,7 +404,10 @@ impl PendingChecks {
404404
// lookup if we haven't gotten that far yet).
405405
match Weak::upgrade(&e.get()) {
406406
Some(pending_msgs) => {
407-
let pending_matches = match &pending_msgs.lock().unwrap().channel_announce {
407+
// This may be called with the mutex held on a different UtxoMessages
408+
// struct, however in that case we have a global lockorder of new messages
409+
// -> old messages, which makes this safe.
410+
let pending_matches = match &pending_msgs.unsafe_well_ordered_double_lock_self().channel_announce {
408411
Some(ChannelAnnouncement::Full(pending_msg)) => Some(pending_msg) == full_msg,
409412
Some(ChannelAnnouncement::Unsigned(pending_msg)) => pending_msg == msg,
410413
None => {

lightning/src/sync/debug_sync.rs

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -124,17 +124,26 @@ impl LockMetadata {
124124
res
125125
}
126126

127-
fn pre_lock(this: &Arc<LockMetadata>) {
127+
fn pre_lock(this: &Arc<LockMetadata>, _double_lock_self_allowed: bool) {
128128
LOCKS_HELD.with(|held| {
129129
// For each lock which is currently locked, check that no lock's locked-before
130130
// set includes the lock we're about to lock, which would imply a lockorder
131131
// inversion.
132132
for (locked_idx, locked) in held.borrow().iter() {
133133
if *locked_idx == this.lock_idx {
134-
// With `feature = "backtrace"` set, we may be looking at different instances
135-
// of the same lock.
136-
debug_assert!(cfg!(feature = "backtrace"), "Tried to acquire a lock while it was held!");
134+
// Note that with `feature = "backtrace"` set, we may be looking at different
135+
// instances of the same lock. Still, doing so is quite risky, a total order
136+
// must be maintained, and doing so across a set of otherwise-identical mutexes
137+
// is fraught with issues.
138+
#[cfg(feature = "backtrace")]
139+
debug_assert!(_double_lock_self_allowed,
140+
"Tried to acquire a lock while it was held!\nLock constructed at {}",
141+
get_construction_location(&this._lock_construction_bt));
142+
#[cfg(not(feature = "backtrace"))]
143+
panic!("Tried to acquire a lock while it was held!");
137144
}
145+
}
146+
for (locked_idx, locked) in held.borrow().iter() {
138147
for (locked_dep_idx, _locked_dep) in locked.locked_before.lock().unwrap().iter() {
139148
if *locked_dep_idx == this.lock_idx && *locked_dep_idx != locked.lock_idx {
140149
#[cfg(feature = "backtrace")]
@@ -236,7 +245,7 @@ impl<T> Mutex<T> {
236245
}
237246

238247
pub fn lock<'a>(&'a self) -> LockResult<MutexGuard<'a, T>> {
239-
LockMetadata::pre_lock(&self.deps);
248+
LockMetadata::pre_lock(&self.deps, false);
240249
self.inner.lock().map(|lock| MutexGuard { mutex: self, lock }).map_err(|_| ())
241250
}
242251

@@ -249,11 +258,17 @@ impl<T> Mutex<T> {
249258
}
250259
}
251260

252-
impl <T> LockTestExt for Mutex<T> {
261+
impl<'a, T: 'a> LockTestExt<'a> for Mutex<T> {
253262
#[inline]
254263
fn held_by_thread(&self) -> LockHeldState {
255264
LockMetadata::held_by_thread(&self.deps)
256265
}
266+
type ExclLock = MutexGuard<'a, T>;
267+
#[inline]
268+
fn unsafe_well_ordered_double_lock_self(&'a self) -> MutexGuard<T> {
269+
LockMetadata::pre_lock(&self.deps, true);
270+
self.inner.lock().map(|lock| MutexGuard { mutex: self, lock }).unwrap()
271+
}
257272
}
258273

259274
pub struct RwLock<T: Sized> {
@@ -317,13 +332,14 @@ impl<T> RwLock<T> {
317332
pub fn read<'a>(&'a self) -> LockResult<RwLockReadGuard<'a, T>> {
318333
// Note that while we could be taking a recursive read lock here, Rust's `RwLock` may
319334
// deadlock trying to take a second read lock if another thread is waiting on the write
320-
// lock. Its platform dependent (but our in-tree `FairRwLock` guarantees this behavior).
321-
LockMetadata::pre_lock(&self.deps);
335+
// lock. This behavior is platform dependent, but our in-tree `FairRwLock` guarantees
336+
// such a deadlock.
337+
LockMetadata::pre_lock(&self.deps, false);
322338
self.inner.read().map(|guard| RwLockReadGuard { lock: self, guard }).map_err(|_| ())
323339
}
324340

325341
pub fn write<'a>(&'a self) -> LockResult<RwLockWriteGuard<'a, T>> {
326-
LockMetadata::pre_lock(&self.deps);
342+
LockMetadata::pre_lock(&self.deps, false);
327343
self.inner.write().map(|guard| RwLockWriteGuard { lock: self, guard }).map_err(|_| ())
328344
}
329345

@@ -336,11 +352,17 @@ impl<T> RwLock<T> {
336352
}
337353
}
338354

339-
impl <T> LockTestExt for RwLock<T> {
355+
impl<'a, T: 'a> LockTestExt<'a> for RwLock<T> {
340356
#[inline]
341357
fn held_by_thread(&self) -> LockHeldState {
342358
LockMetadata::held_by_thread(&self.deps)
343359
}
360+
type ExclLock = RwLockWriteGuard<'a, T>;
361+
#[inline]
362+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<'a, T> {
363+
LockMetadata::pre_lock(&self.deps, true);
364+
self.inner.write().map(|guard| RwLockWriteGuard { lock: self, guard }).unwrap()
365+
}
344366
}
345367

346368
pub type FairRwLock<T> = RwLock<T>;

lightning/src/sync/fairrwlock.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,15 @@ impl<T> FairRwLock<T> {
5050
}
5151
}
5252

53-
impl<T> LockTestExt for FairRwLock<T> {
53+
impl<'a, T: 'a> LockTestExt<'a> for FairRwLock<T> {
5454
#[inline]
5555
fn held_by_thread(&self) -> LockHeldState {
5656
// fairrwlock is only built in non-test modes, so we should never support tests.
5757
LockHeldState::Unsupported
5858
}
59+
type ExclLock = RwLockWriteGuard<'a, T>;
60+
#[inline]
61+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<'a, T> {
62+
self.write().unwrap()
63+
}
5964
}

lightning/src/sync/mod.rs

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,17 @@ pub(crate) enum LockHeldState {
77
Unsupported,
88
}
99

10-
pub(crate) trait LockTestExt {
10+
pub(crate) trait LockTestExt<'a> {
1111
fn held_by_thread(&self) -> LockHeldState;
12+
type ExclLock;
13+
/// If two instances of the same mutex are being taken at the same time, it's very easy to have
14+
/// a lockorder inversion and risk deadlock. Thus, we default to disabling such locks.
15+
///
16+
/// However, sometimes they cannot be avoided. In such cases, this method exists to take a
17+
/// mutex while avoiding a test failure. It is deliberately verbose and includes the term
18+
/// "unsafe" to indicate that special care needs to be taken to ensure no deadlocks are
19+
/// possible.
20+
fn unsafe_well_ordered_double_lock_self(&'a self) -> Self::ExclLock;
1221
}
1322

1423
#[cfg(all(feature = "std", not(feature = "_bench_unstable"), test))]
@@ -27,13 +36,19 @@ pub use {std::sync::{Arc, Mutex, Condvar, MutexGuard, RwLock, RwLockReadGuard, R
2736
#[cfg(all(feature = "std", any(feature = "_bench_unstable", not(test))))]
2837
mod ext_impl {
2938
use super::*;
30-
impl<T> LockTestExt for Mutex<T> {
39+
impl<'a, T: 'a> LockTestExt<'a> for Mutex<T> {
3140
#[inline]
3241
fn held_by_thread(&self) -> LockHeldState { LockHeldState::Unsupported }
42+
type ExclLock = MutexGuard<'a, T>;
43+
#[inline]
44+
fn unsafe_well_ordered_double_lock_self(&'a self) -> MutexGuard<T> { self.lock().unwrap() }
3345
}
34-
impl<T> LockTestExt for RwLock<T> {
46+
impl<'a, T: 'a> LockTestExt<'a> for RwLock<T> {
3547
#[inline]
3648
fn held_by_thread(&self) -> LockHeldState { LockHeldState::Unsupported }
49+
type ExclLock = RwLockWriteGuard<'a, T>;
50+
#[inline]
51+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<T> { self.write().unwrap() }
3752
}
3853
}
3954

lightning/src/sync/nostd_sync.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,15 @@ impl<T> Mutex<T> {
6262
}
6363
}
6464

65-
impl<T> LockTestExt for Mutex<T> {
65+
impl<'a, T: 'a> LockTestExt<'a> for Mutex<T> {
6666
#[inline]
6767
fn held_by_thread(&self) -> LockHeldState {
6868
if self.lock().is_err() { return LockHeldState::HeldByThread; }
6969
else { return LockHeldState::NotHeldByThread; }
7070
}
71+
type ExclLock = MutexGuard<'a, T>;
72+
#[inline]
73+
fn unsafe_well_ordered_double_lock_self(&'a self) -> MutexGuard<T> { self.lock().unwrap() }
7174
}
7275

7376
pub struct RwLock<T: ?Sized> {
@@ -125,12 +128,15 @@ impl<T> RwLock<T> {
125128
}
126129
}
127130

128-
impl<T> LockTestExt for RwLock<T> {
131+
impl<'a, T: 'a> LockTestExt<'a> for RwLock<T> {
129132
#[inline]
130133
fn held_by_thread(&self) -> LockHeldState {
131134
if self.write().is_err() { return LockHeldState::HeldByThread; }
132135
else { return LockHeldState::NotHeldByThread; }
133136
}
137+
type ExclLock = RwLockWriteGuard<'a, T>;
138+
#[inline]
139+
fn unsafe_well_ordered_double_lock_self(&'a self) -> RwLockWriteGuard<T> { self.write().unwrap() }
134140
}
135141

136142
pub type FairRwLock<T> = RwLock<T>;

0 commit comments

Comments
 (0)