From 64c2a7412b73768081d35354fd959a8ca028bccb Mon Sep 17 00:00:00 2001 From: Stepan Koltsov Date: Sat, 5 Apr 2014 22:14:46 +0000 Subject: [PATCH] Optimize Once::doit when initialization is already completed * Load is much cheaper than fetch_add, at least on x86_64. * Common path of `doit` can be inlined Verified with this test: ``` static mut o: one::Once = one::ONCE_INIT; loop { unsafe { let start = time::precise_time_ns(); let iters = 50000000u64; for _ in range(0, iters) { o.doit(|| { println!("once!"); }); } let end = time::precise_time_ns(); let ps_per_iter = 1000 * (end - start) / iters; println!("{} ps per iter", ps_per_iter); // confuse the optimizer o.doit(|| { println!("once!"); }); } } ``` Test executed on Mac, Intel Core i7 2GHz. Result is 700ps per iteration with patch applied, and 17000ps per iteration without patch. --- src/libsync/one.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/libsync/one.rs b/src/libsync/one.rs index 7da6f39b840e5..388c63becbfd2 100644 --- a/src/libsync/one.rs +++ b/src/libsync/one.rs @@ -63,7 +63,17 @@ impl Once { /// /// When this function returns, it is guaranteed that some initialization /// has run and completed (it may not be the closure specified). + #[inline(always)] pub fn doit(&self, f: ||) { + // Optimize common path: load is much cheaper than fetch_add. + if self.cnt.load(atomics::SeqCst) < 0 { + return + } + + self.doit_slow(f); + } + + fn doit_slow(&self, f: ||) { // Implementation-wise, this would seem like a fairly trivial primitive. // The stickler part is where our mutexes currently require an // allocation, and usage of a `Once` should't leak this allocation.