11//! A bit-vector data structure, optimized for
22//! [rank](http://bitmagic.io/rank-select.html) operations.
33//!
4- //! There is also an opportunistic `select` operation, but the general case has not been
5- //! implemented.
6- //!
74//! See also: ["Succinct data structure"](https://en.wikipedia.org/wiki/Succinct_data_structure).
85
96type SubblockBits = u128 ;
@@ -55,15 +52,6 @@ impl Block {
5552 self . bits [ chunk_idx] ^= mask;
5653 }
5754
58- /// Tests whether the bit at the given index is set.
59- fn get ( & self , index : usize ) -> bool {
60- assert ! ( index < BITS_PER_BLOCK ) ;
61- let chunk_idx = index / BITS_PER_SUB_BLOCK ;
62- let bit_idx = index % BITS_PER_SUB_BLOCK ;
63- let mask = 1 << ( ( BITS_PER_SUB_BLOCK - 1 ) - bit_idx) ;
64- self . bits [ chunk_idx] & mask != 0
65- }
66-
6755 /// The **total rank** of the block relative local index, and the index of the one
6856 /// bit that establishes that rank (aka "select") **if** it occurs within that same
6957 /// chunk, otherwise ['None']. The assumption is that if you would have to look back
@@ -99,40 +87,6 @@ impl Block {
9987 . map ( |c| c. count_ones ( ) as usize )
10088 . sum :: < usize > ( )
10189 }
102-
103- fn predecessor ( & self , idx : usize ) -> Option < usize > {
104- let sub_block = idx / BITS_PER_SUB_BLOCK ;
105- let masked = self . bits [ sub_block] >> ( BITS_PER_SUB_BLOCK - 1 - idx % BITS_PER_SUB_BLOCK ) ;
106- if masked > 0 {
107- Some ( idx - masked. trailing_zeros ( ) as usize )
108- } else {
109- for i in ( 0 ..sub_block) . rev ( ) {
110- let masked = self . bits [ i] ;
111- if masked > 0 {
112- return Some (
113- ( i + 1 ) * BITS_PER_SUB_BLOCK - masked. trailing_zeros ( ) as usize - 1 ,
114- ) ;
115- }
116- }
117- None
118- }
119- }
120-
121- fn successor ( & self , idx : usize ) -> Option < usize > {
122- let sub_block = idx / BITS_PER_SUB_BLOCK ;
123- let masked = self . bits [ sub_block] << ( idx % BITS_PER_SUB_BLOCK ) ;
124- if masked > 0 {
125- Some ( idx + masked. leading_zeros ( ) as usize )
126- } else {
127- for i in ( sub_block + 1 ) ..SUB_BLOCKS_PER_BLOCK {
128- let masked = self . bits [ i] ;
129- if masked > 0 {
130- return Some ( i * BITS_PER_SUB_BLOCK + masked. leading_zeros ( ) as usize ) ;
131- }
132- }
133- None
134- }
135- }
13690}
13791
13892/// Builder for creating a [`BitRank`].
@@ -154,6 +108,7 @@ pub struct BitRankBuilder {
154108
155109impl BitRankBuilder {
156110 /// Returns a new builder.
111+ #[ cfg( test) ]
157112 pub fn new ( ) -> Self {
158113 Self :: default ( )
159114 }
@@ -221,20 +176,6 @@ pub struct BitRank {
221176}
222177
223178impl BitRank {
224- /// Creates a `BitRank` containing the integers in `iter`.
225- ///
226- /// # Panics
227- /// This may panic if the values produced by `iter` are not strictly increasing.
228- #[ allow( dead_code) ]
229- #[ allow( clippy:: should_implement_trait) ]
230- pub fn from_iter < I : IntoIterator < Item = usize > > ( iter : I ) -> BitRank {
231- let mut builder = BitRankBuilder :: new ( ) ;
232- for position in iter {
233- builder. push ( position) ;
234- }
235- builder. finish ( )
236- }
237-
238179 /// The rank at the specified index (exclusive).
239180 ///
240181 /// The (one) rank is defined as: `rank(i) = sum(b[j] for j in 0..i)`
@@ -243,51 +184,6 @@ impl BitRank {
243184 self . rank_select ( idx) . 0
244185 }
245186
246- /// Tests whether the bit at the given index is set.
247- #[ allow( dead_code) ]
248- pub fn get ( & self , idx : usize ) -> bool {
249- let block_num = idx / BITS_PER_BLOCK ;
250- // assert!(block_num < self.blocks.len(), "index out of bounds");
251- if block_num >= self . blocks . len ( ) {
252- false
253- } else {
254- self . blocks [ block_num] . get ( idx % BITS_PER_BLOCK )
255- }
256- }
257-
258- /// Returns the 1 bit at or before the specified index.
259- #[ allow( dead_code) ]
260- pub fn predecessor ( & self , idx : usize ) -> usize {
261- let block_num = idx / BITS_PER_BLOCK ;
262- if block_num < self . blocks . len ( ) {
263- if let Some ( p) = self . blocks [ block_num] . predecessor ( idx % BITS_PER_BLOCK ) {
264- return block_num * BITS_PER_BLOCK + p;
265- }
266- }
267- for block_num in ( 0 ..self . blocks . len ( ) . min ( block_num) ) . rev ( ) {
268- if let Some ( p) = self . blocks [ block_num] . predecessor ( BITS_PER_BLOCK - 1 ) {
269- return block_num * BITS_PER_BLOCK + p;
270- }
271- }
272- panic ! ( "no predecessor found!" ) ;
273- }
274-
275- /// Returns the next 1 bit at or after the specified index.
276- #[ allow( dead_code) ]
277- pub fn successor ( & self , idx : usize ) -> usize {
278- let block_num = idx / BITS_PER_BLOCK ;
279- if let Some ( s) = self . blocks [ block_num] . successor ( idx % BITS_PER_BLOCK ) {
280- s + block_num * BITS_PER_BLOCK
281- } else {
282- for block_num in block_num + 1 ..self . blocks . len ( ) {
283- if let Some ( p) = self . blocks [ block_num] . successor ( 0 ) {
284- return block_num * BITS_PER_BLOCK + p;
285- }
286- }
287- panic ! ( "no successor found!" ) ;
288- }
289- }
290-
291187 /// Returns the number of elements in the set.
292188 pub fn max_rank ( & self ) -> usize {
293189 self . blocks
@@ -314,58 +210,55 @@ impl BitRank {
314210 ( rank, b_idx. map ( |i| ( block_num * BITS_PER_BLOCK ) + i) )
315211 }
316212 }
317-
318- /// The total size of the bit vec that was allocated.
319- /// **Note:** This is more like capacity than normal `len` in that it does not
320- /// consider how much of the bit vec is actually used.
321- #[ allow( dead_code) ]
322- pub fn capacity ( & self ) -> usize {
323- self . blocks . len ( ) * BITS_PER_BLOCK
324- }
325213}
326214
327215#[ cfg( test) ]
328216mod tests {
329- use itertools:: Itertools ;
330217 use rand:: distributions:: Uniform ;
331218 use rand:: prelude:: * ;
332219 use rand_chacha:: ChaCha8Rng ;
333220
334221 use super :: * ;
335222
336- fn write ( positions : & [ usize ] ) -> BitRank {
337- BitRank :: from_iter ( positions. iter ( ) . copied ( ) )
223+ /// Creates a `BitRank` containing the integers in `iter` (which should be strictly
224+ /// increasing).
225+ pub fn bitrank < I : IntoIterator < Item = usize > > ( iter : I ) -> BitRank {
226+ let mut builder = BitRankBuilder :: new ( ) ;
227+ for position in iter {
228+ builder. push ( position) ;
229+ }
230+ builder. finish ( )
338231 }
339232
340233 #[ test]
341234 fn test_rank_zero ( ) {
342- let br = BitRank :: from_iter ( [ 0 ] ) ;
235+ let br = bitrank ( [ 0 ] ) ;
343236 assert_eq ! ( br. rank( 0 ) , 0 ) ;
344237 assert_eq ! ( br. rank( 1 ) , 1 ) ;
345238 }
346239
347240 #[ test]
348241 fn test_empty ( ) {
349- let br = BitRank :: from_iter ( [ ] ) ;
242+ let br = bitrank ( [ ] ) ;
350243 assert ! ( br. blocks. is_empty( ) ) ;
351244 }
352245
353246 #[ test]
354247 fn test_index_out_of_bounds ( ) {
355- let br = BitRank :: from_iter ( [ BITS_PER_BLOCK - 1 ] ) ;
248+ let br = bitrank ( [ BITS_PER_BLOCK - 1 ] ) ;
356249 assert_eq ! ( br. rank( BITS_PER_BLOCK ) , 1 ) ;
357250 }
358251
359252 #[ test]
360253 #[ should_panic]
361254 fn test_duplicate_position ( ) {
362- write ( & [ 64 , 66 , 68 , 68 , 90 ] ) ;
255+ bitrank ( [ 64 , 66 , 68 , 68 , 90 ] ) ;
363256 }
364257
365258 #[ test]
366259 fn test_rank_exclusive ( ) {
367- let br = BitRank :: from_iter ( 0 ..132 ) ;
368- assert_eq ! ( br. capacity ( ) , BITS_PER_BLOCK ) ;
260+ let br = bitrank ( 0 ..132 ) ;
261+ assert_eq ! ( br. blocks . len ( ) , 1 ) ;
369262 assert_eq ! ( br. rank( 64 ) , 64 ) ;
370263 assert_eq ! ( br. rank( 132 ) , 132 ) ;
371264 }
@@ -374,39 +267,37 @@ mod tests {
374267 fn test_rank ( ) {
375268 let mut positions: Vec < usize > = ( 0 ..132 ) . collect ( ) ;
376269 positions. append ( & mut vec ! [ 138usize , 140 , 146 ] ) ;
377- let br = write ( & positions) ;
270+ let br = bitrank ( positions) ;
378271 assert_eq ! ( br. rank( 135 ) , 132 ) ;
379272
380- let bits2: Vec < usize > = ( 0 ..BITS_PER_BLOCK - 5 ) . collect ( ) ;
381- let br2 = write ( & bits2) ;
273+ let br2 = bitrank ( 0 ..BITS_PER_BLOCK - 5 ) ;
382274 assert_eq ! ( br2. rank( 169 ) , 169 ) ;
383275
384- let bits3: Vec < usize > = ( 0 ..BITS_PER_BLOCK + 5 ) . collect ( ) ;
385- let br3 = write ( & bits3) ;
276+ let br3 = bitrank ( 0 ..BITS_PER_BLOCK + 5 ) ;
386277 assert_eq ! ( br3. rank( BITS_PER_BLOCK ) , BITS_PER_BLOCK ) ;
387278 }
388279
389280 #[ test]
390281 fn test_rank_idx ( ) {
391282 let mut positions: Vec < usize > = ( 0 ..132 ) . collect ( ) ;
392283 positions. append ( & mut vec ! [ 138usize , 140 , 146 ] ) ;
393- let br = write ( & positions) ;
284+ let br = bitrank ( positions) ;
394285 assert_eq ! ( br. rank_select( 135 ) , ( 132 , Some ( 131 ) ) ) ;
395286
396287 let bits2: Vec < usize > = ( 0 ..BITS_PER_BLOCK - 5 ) . collect ( ) ;
397- let br2 = write ( & bits2) ;
288+ let br2 = bitrank ( bits2) ;
398289 assert_eq ! ( br2. rank_select( 169 ) , ( 169 , Some ( 168 ) ) ) ;
399290
400291 let bits3: Vec < usize > = ( 0 ..BITS_PER_BLOCK + 5 ) . collect ( ) ;
401- let br3 = write ( & bits3) ;
292+ let br3 = bitrank ( bits3) ;
402293 assert_eq ! ( br3. rank_select( BITS_PER_BLOCK ) , ( BITS_PER_BLOCK , None ) ) ;
403294
404295 let bits4: Vec < usize > = vec ! [ 1 , 1000 , 9999 , BITS_PER_BLOCK + 1 ] ;
405- let br4 = write ( & bits4) ;
296+ let br4 = bitrank ( bits4) ;
406297 assert_eq ! ( br4. rank_select( 10000 ) , ( 3 , Some ( 9999 ) ) ) ;
407298
408299 let bits5: Vec < usize > = vec ! [ 1 , 1000 , 9999 , BITS_PER_BLOCK + 1 ] ;
409- let br5 = write ( & bits5) ;
300+ let br5 = bitrank ( bits5) ;
410301 assert_eq ! ( br5. rank_select( BITS_PER_BLOCK ) , ( 3 , None ) ) ;
411302 }
412303
@@ -422,7 +313,7 @@ mod tests {
422313 // This isn't strictly necessary, given that the bit would just be toggled again, but it
423314 // ensures that we are meeting the contract.
424315 random_bits. dedup ( ) ;
425- let br = write ( & random_bits) ;
316+ let br = bitrank ( random_bits. iter ( ) . copied ( ) ) ;
426317 let mut rank = 0 ;
427318 let mut select = None ;
428319 for i in 0 ..random_bits. capacity ( ) {
@@ -442,7 +333,7 @@ mod tests {
442333 #[ test]
443334 fn test_rank_out_of_bounds ( ) {
444335 for i in 1 ..30 {
445- let br = write ( & [ BITS_PER_BLOCK * i - 1 ] ) ;
336+ let br = bitrank ( [ BITS_PER_BLOCK * i - 1 ] ) ;
446337 assert_eq ! ( br. max_rank( ) , 1 ) ;
447338 assert_eq ! ( br. rank( BITS_PER_BLOCK * i - 1 ) , 0 ) ;
448339 for j in 0 ..10 {
@@ -451,29 +342,9 @@ mod tests {
451342 }
452343 }
453344
454- #[ test]
455- fn test_predecessor_and_successor ( ) {
456- let mut rng = ChaCha8Rng :: seed_from_u64 ( 2 ) ;
457- let uniform = Uniform :: < usize > :: from ( 0 ..1_000_000 ) ;
458- let mut random_bits = Vec :: with_capacity ( 100_000 ) ;
459- for _ in 0 ..100_000 {
460- random_bits. push ( uniform. sample ( & mut rng) ) ;
461- }
462- random_bits. sort_unstable ( ) ;
463- random_bits. dedup ( ) ;
464- let br = write ( & random_bits) ;
465-
466- for ( i, j) in random_bits. iter ( ) . copied ( ) . tuple_windows ( ) {
467- for k in i..j {
468- assert_eq ! ( br. successor( k + 1 ) , j, "{i} {k} {j}" ) ;
469- assert_eq ! ( br. predecessor( k) , i, "{i} {k} {j}" ) ;
470- }
471- }
472- }
473-
474345 #[ test]
475346 fn test_large_gap ( ) {
476- let br = BitRank :: from_iter ( ( 3 ..4 ) . chain ( BITS_PER_BLOCK * 15 ..BITS_PER_BLOCK * 15 + 17 ) ) ;
347+ let br = bitrank ( ( 3 ..4 ) . chain ( BITS_PER_BLOCK * 15 ..BITS_PER_BLOCK * 15 + 17 ) ) ;
477348 for i in 1 ..15 {
478349 assert_eq ! ( br. rank( BITS_PER_BLOCK * i) , 1 ) ;
479350 }
0 commit comments