Skip to content

Perf improvements to collections::BitSet. #25230

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 18, 2015
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 77 additions & 75 deletions src/libcollections/bit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1451,8 +1451,8 @@ impl BitSet {
/// ```
#[inline]
#[stable(feature = "rust1", since = "1.0.0")]
pub fn iter(&self) -> bit_set::Iter {
SetIter {set: self, next_idx: 0}
pub fn iter<'a>(&'a self) -> bit_set::Iter<'a> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These annotations seem strictly redundant?

SetIter(BlockIter::from_blocks(self.bit_vec.blocks()))
}

/// Iterator over each u32 stored in `self` union `other`.
Expand All @@ -1477,13 +1477,11 @@ impl BitSet {
pub fn union<'a>(&'a self, other: &'a BitSet) -> Union<'a> {
fn or(w1: u32, w2: u32) -> u32 { w1 | w2 }

Union(TwoBitPositions {
set: self,
other: other,
Union(BlockIter::from_blocks(TwoBitPositions {
set: self.bit_vec.blocks(),
other: other.bit_vec.blocks(),
merge: or,
current_word: 0,
next_idx: 0
})
}))
}

/// Iterator over each usize stored in `self` intersect `other`.
Expand All @@ -1508,13 +1506,12 @@ impl BitSet {
pub fn intersection<'a>(&'a self, other: &'a BitSet) -> Intersection<'a> {
fn bitand(w1: u32, w2: u32) -> u32 { w1 & w2 }
let min = cmp::min(self.bit_vec.len(), other.bit_vec.len());
Intersection(TwoBitPositions {
set: self,
other: other,

Intersection(BlockIter::from_blocks(TwoBitPositions {
set: self.bit_vec.blocks(),
other: other.bit_vec.blocks(),
merge: bitand,
current_word: 0,
next_idx: 0
}.take(min))
}).take(min))
}

/// Iterator over each usize stored in the `self` setminus `other`.
Expand Down Expand Up @@ -1546,13 +1543,11 @@ impl BitSet {
pub fn difference<'a>(&'a self, other: &'a BitSet) -> Difference<'a> {
fn diff(w1: u32, w2: u32) -> u32 { w1 & !w2 }

Difference(TwoBitPositions {
set: self,
other: other,
Difference(BlockIter::from_blocks(TwoBitPositions {
set: self.bit_vec.blocks(),
other: other.bit_vec.blocks(),
merge: diff,
current_word: 0,
next_idx: 0
})
}))
}

/// Iterator over each u32 stored in the symmetric difference of `self` and `other`.
Expand All @@ -1578,13 +1573,11 @@ impl BitSet {
pub fn symmetric_difference<'a>(&'a self, other: &'a BitSet) -> SymmetricDifference<'a> {
fn bitxor(w1: u32, w2: u32) -> u32 { w1 ^ w2 }

SymmetricDifference(TwoBitPositions {
set: self,
other: other,
SymmetricDifference(BlockIter::from_blocks(TwoBitPositions {
set: self.bit_vec.blocks(),
other: other.bit_vec.blocks(),
merge: bitxor,
current_word: 0,
next_idx: 0
})
}))
}

/// Unions in-place with the specified other bit vector.
Expand Down Expand Up @@ -1808,98 +1801,107 @@ impl hash::Hash for BitSet {
}
}

/// An iterator for `BitSet`.
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct SetIter<'a> {
set: &'a BitSet,
next_idx: usize
struct BlockIter<T> where
T: Iterator<Item=u32> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh woah I thought this was a field for a second and was having my mind blown

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This vindicates my already held ideas about where clauses and brace style (newline after the where stuff).

head: u32,
head_offset: usize,
tail: T
}
impl<'a, T> BlockIter<T> where
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ultra-nit: missing newline

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ultra-nit 2: Use trailing comma everywhere possible. So on the tail line too.

T: Iterator<Item=u32> {
fn from_blocks(mut blocks: T) -> BlockIter<T> {
let h = blocks.next().unwrap_or(0);
BlockIter {tail: blocks, head: h, head_offset: 0}
}
}

/// An iterator combining two `BitSet` iterators.
#[derive(Clone)]
struct TwoBitPositions<'a> {
set: &'a BitSet,
other: &'a BitSet,
set: Blocks<'a>,
other: Blocks<'a>,
merge: fn(u32, u32) -> u32,
current_word: u32,
next_idx: usize
}

/// An iterator for `BitSet`.
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct SetIter<'a>(BlockIter<Blocks<'a>>);
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Union<'a>(TwoBitPositions<'a>);
pub struct Union<'a>(BlockIter<TwoBitPositions<'a>>);
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Intersection<'a>(Take<TwoBitPositions<'a>>);
pub struct Intersection<'a>(Take<BlockIter<TwoBitPositions<'a>>>);
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct Difference<'a>(TwoBitPositions<'a>);
pub struct Difference<'a>(BlockIter<TwoBitPositions<'a>>);
#[derive(Clone)]
#[stable(feature = "rust1", since = "1.0.0")]
pub struct SymmetricDifference<'a>(TwoBitPositions<'a>);
pub struct SymmetricDifference<'a>(BlockIter<TwoBitPositions<'a>>);

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for SetIter<'a> {
impl<'a, T> Iterator for BlockIter<T> where T: Iterator<Item=u32> {
type Item = usize;

fn next(&mut self) -> Option<usize> {
while self.next_idx < self.set.bit_vec.len() {
let idx = self.next_idx;
self.next_idx += 1;

if self.set.contains(&idx) {
return Some(idx);
while self.head == 0 {
match self.tail.next() {
Some(w) => self.head = w,
_ => return None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Making the _ an explicit None seems marginally cleaner.

}
self.head_offset += u32::BITS;
}

return None;
let t = self.head & !self.head + 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you parenthesize this to avoid the reader knowing the order-of-ops for + and &?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(I mean, yeah (self.head & !self.head) is a ridiculous op, but not having to think about that is nice)

// remove the least significant bit
self.head &= self.head - 1;
// return index of lsb
Some(self.head_offset + (u32::count_ones(t-1) as usize))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is super cool, but I consider bit ops to be pretty hard to understand. I'd appreciate some more expanded docs on the idea behind these. These were my notes for verifying/understanding the behaviour:

Make only the LSB set:
10011000: head
01100111: !head
01101000: !head + 1
00001000: head & (!head + 1)

Erase the LSB:
10011000: head
10010111: head-1
10010000: head & (head-1)

Index of LSB:
00001000: head & (!head + 1)
00000111: (head & (!head + 1)) - 1
3 ones => bit 3 was LSB

}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
(0, Some(self.set.bit_vec.len() - self.next_idx))
match self.tail.size_hint() {
(_, Some(h)) => (0, Some(1 + h * (u32::BITS as usize))),
_ => (0, None)
}
}
}

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for TwoBitPositions<'a> {
type Item = usize;

fn next(&mut self) -> Option<usize> {
while self.next_idx < self.set.bit_vec.len() ||
self.next_idx < self.other.bit_vec.len() {
let bit_idx = self.next_idx % u32::BITS;
if bit_idx == 0 {
let s_bit_vec = &self.set.bit_vec;
let o_bit_vec = &self.other.bit_vec;
// Merging the two words is a bit of an awkward dance since
// one BitVec might be longer than the other
let word_idx = self.next_idx / u32::BITS;
let w1 = if word_idx < s_bit_vec.storage.len() {
s_bit_vec.storage[word_idx]
} else { 0 };
let w2 = if word_idx < o_bit_vec.storage.len() {
o_bit_vec.storage[word_idx]
} else { 0 };
self.current_word = (self.merge)(w1, w2);
}

self.next_idx += 1;
if self.current_word & (1 << bit_idx) != 0 {
return Some(self.next_idx - 1);
}
type Item = u32;

fn next(&mut self) -> Option<u32> {
match (self.set.next(), self.other.next()) {
(Some(a), Some(b)) => Some((self.merge)(a, b)),
(Some(a), None) => Some((self.merge)(a, 0)),
(None, Some(b)) => Some((self.merge)(0, b)),
_ => return None
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💖

return None;
}

#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let cap = cmp::max(self.set.bit_vec.len(), self.other.bit_vec.len());
(0, Some(cap - self.next_idx))
let (a, al) = self.set.size_hint();
let (b, bl) = self.set.size_hint();

assert_eq!(a, b);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems a bit odd/paranoid...

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Change to debug_assert_eq! then.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm probably being a bit dense here. Will need to revisit.

(a, cmp::max(al, bl))
}
}

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for SetIter<'a> {
type Item = usize;

#[inline] fn next(&mut self) -> Option<usize> { self.0.next() }
#[inline] fn size_hint(&self) -> (usize, Option<usize>) { self.0.size_hint() }
}

#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> Iterator for Union<'a> {
type Item = usize;
Expand Down