@@ -231,6 +231,21 @@ BitmapIndex<T>::SerializeIndexData(uint8_t* data_ptr) {
231231 }
232232}
233233
234+ template <typename T>
235+ std::pair<std::shared_ptr<uint8_t []>, size_t >
236+ BitmapIndex<T>::SerializeValidBitsetData() const {
237+ size_t valid_bitset_size = (total_num_rows_ + 7 ) / 8 ;
238+ std::shared_ptr<uint8_t []> valid_bitset_data (
239+ new uint8_t [valid_bitset_size]);
240+ memset (valid_bitset_data.get (), 0 , valid_bitset_size);
241+ for (size_t i = 0 ; i < total_num_rows_; ++i) {
242+ if (valid_bitset_[i]) {
243+ valid_bitset_data[i / 8 ] |= (1 << (i % 8 ));
244+ }
245+ }
246+ return std::make_pair (valid_bitset_data, valid_bitset_size);
247+ }
248+
234249template <typename T>
235250std::pair<std::shared_ptr<uint8_t []>, size_t >
236251BitmapIndex<T>::SerializeIndexMeta() {
@@ -263,6 +278,24 @@ BitmapIndex<std::string>::SerializeIndexData(uint8_t* data_ptr) {
263278 }
264279}
265280
281+ template <typename T>
282+ void
283+ BitmapIndex<T>::DeserializeValidBitsetData(const uint8_t * data_ptr,
284+ size_t data_size) {
285+ auto expected_size = (total_num_rows_ + 7 ) / 8 ;
286+ AssertInfo (data_size == expected_size,
287+ " bitmap valid_bitset size mismatch, expect {}, got {}" ,
288+ expected_size,
289+ data_size);
290+ valid_bitset_ = TargetBitmap (total_num_rows_, false );
291+ for (size_t i = 0 ; i < total_num_rows_; ++i) {
292+ uint8_t byte = data_ptr[i / 8 ];
293+ if (byte & (1 << (i % 8 ))) {
294+ valid_bitset_.set (i);
295+ }
296+ }
297+ }
298+
266299template <typename T>
267300BinarySet
268301BitmapIndex<T>::Serialize(const Config& config) {
@@ -279,6 +312,11 @@ BitmapIndex<T>::Serialize(const Config& config) {
279312 BinarySet ret_set;
280313 ret_set.Append (BITMAP_INDEX_DATA, index_data, index_data_size);
281314 ret_set.Append (BITMAP_INDEX_META, index_meta.first , index_meta.second );
315+ if (schema_.nullable ()) {
316+ auto valid_bitset = SerializeValidBitsetData ();
317+ ret_set.Append (
318+ BITMAP_INDEX_VALID_BITSET, valid_bitset.first , valid_bitset.second );
319+ }
282320
283321 LOG_INFO (" build bitmap index with cardinality = {}, num_rows = {}" ,
284322 data_.size (),
@@ -354,7 +392,8 @@ BitmapIndex<T>::ChooseIndexLoadMode(int64_t index_length) {
354392template <typename T>
355393void
356394BitmapIndex<T>::DeserializeIndexData(const uint8_t * data_ptr,
357- size_t index_length) {
395+ size_t index_length,
396+ bool rebuild_validity_from_postings) {
358397 ChooseIndexLoadMode (index_length);
359398 for (size_t i = 0 ; i < index_length; ++i) {
360399 T key;
@@ -370,8 +409,10 @@ BitmapIndex<T>::DeserializeIndexData(const uint8_t* data_ptr,
370409 } else {
371410 data_[key] = value;
372411 }
373- for (const auto & v : value) {
374- valid_bitset_.set (v);
412+ if (rebuild_validity_from_postings) {
413+ for (const auto & v : value) {
414+ valid_bitset_.set (v);
415+ }
375416 }
376417 }
377418}
@@ -413,8 +454,10 @@ BitmapIndex<T>::BuildOffsetCache() {
413454
414455template <>
415456void
416- BitmapIndex<std::string>::DeserializeIndexData(const uint8_t * data_ptr,
417- size_t index_length) {
457+ BitmapIndex<std::string>::DeserializeIndexData(
458+ const uint8_t * data_ptr,
459+ size_t index_length,
460+ bool rebuild_validity_from_postings) {
418461 ChooseIndexLoadMode (index_length);
419462 for (size_t i = 0 ; i < index_length; ++i) {
420463 size_t key_size;
@@ -433,8 +476,10 @@ BitmapIndex<std::string>::DeserializeIndexData(const uint8_t* data_ptr,
433476 } else {
434477 data_[key] = value;
435478 }
436- for (const auto & v : value) {
437- valid_bitset_.set (v);
479+ if (rebuild_validity_from_postings) {
480+ for (const auto & v : value) {
481+ valid_bitset_.set (v);
482+ }
438483 }
439484 }
440485}
@@ -468,7 +513,8 @@ BitmapIndex<T>::MMapIndexData(const std::string& file_name,
468513 const uint8_t * data_ptr,
469514 size_t data_size,
470515 size_t index_length,
471- milvus::proto::common::LoadPriority priority) {
516+ milvus::proto::common::LoadPriority priority,
517+ bool rebuild_validity_from_postings) {
472518 std::filesystem::create_directories (
473519 std::filesystem::path (file_name).parent_path ());
474520
@@ -483,8 +529,10 @@ BitmapIndex<T>::MMapIndexData(const std::string& file_name,
483529 roaring::Roaring value;
484530 value =
485531 roaring::Roaring::read (reinterpret_cast <const char *>(data_ptr));
486- for (const auto & v : value) {
487- valid_bitset_.set (v);
532+ if (rebuild_validity_from_postings) {
533+ for (const auto & v : value) {
534+ valid_bitset_.set (v);
535+ }
488536 }
489537
490538 // convert roaring vaule to frozen mode
@@ -537,7 +585,15 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
537585 index_meta_buffer->size );
538586 auto index_length = index_meta.first ;
539587 total_num_rows_ = index_meta.second ;
540- valid_bitset_ = TargetBitmap (total_num_rows_, false );
588+ valid_bitset_ = TargetBitmap (total_num_rows_, !schema_.nullable ());
589+ bool rebuild_validity_from_postings = schema_.nullable ();
590+
591+ auto valid_bitset_buffer = binary_set.GetByName (BITMAP_INDEX_VALID_BITSET);
592+ if (valid_bitset_buffer != nullptr ) {
593+ DeserializeValidBitsetData (valid_bitset_buffer->data .get (),
594+ valid_bitset_buffer->size );
595+ rebuild_validity_from_postings = false ;
596+ }
541597
542598 auto index_data_buffer = binary_set.GetByName (BITMAP_INDEX_DATA);
543599
@@ -558,9 +614,12 @@ BitmapIndex<T>::LoadWithoutAssemble(const BinarySet& binary_set,
558614 index_data_buffer->data .get (),
559615 index_data_buffer->size ,
560616 index_length,
561- priority);
617+ priority,
618+ rebuild_validity_from_postings);
562619 } else {
563- DeserializeIndexData (index_data_buffer->data .get (), index_length);
620+ DeserializeIndexData (index_data_buffer->data .get (),
621+ index_length,
622+ rebuild_validity_from_postings);
564623 }
565624
566625 if (enable_offset_cache.has_value () && enable_offset_cache.value ()) {
@@ -1313,6 +1372,12 @@ BitmapIndex<T>::WriteEntries(storage::IndexEntryWriter* writer) {
13131372 uint8_t * data_ptr = index_data.get ();
13141373 SerializeIndexData (data_ptr);
13151374 writer->WriteEntry (BITMAP_INDEX_DATA, index_data.get (), index_data_size);
1375+ if (schema_.nullable ()) {
1376+ auto valid_bitset = SerializeValidBitsetData ();
1377+ writer->WriteEntry (BITMAP_INDEX_VALID_BITSET,
1378+ valid_bitset.first .get (),
1379+ valid_bitset.second );
1380+ }
13161381
13171382 LOG_INFO (" write bitmap index entries with cardinality = {}, num_rows = {}" ,
13181383 data_.size (),
@@ -1329,7 +1394,18 @@ BitmapIndex<T>::LoadEntries(storage::IndexEntryReader& reader,
13291394 // V3 format: meta is in __meta__ entry
13301395 auto index_length = reader.GetMeta <size_t >(BITMAP_INDEX_LENGTH);
13311396 total_num_rows_ = reader.GetMeta <size_t >(BITMAP_INDEX_NUM_ROWS);
1332- valid_bitset_ = TargetBitmap (total_num_rows_, false );
1397+ valid_bitset_ = TargetBitmap (total_num_rows_, !schema_.nullable ());
1398+ bool rebuild_validity_from_postings = schema_.nullable ();
1399+
1400+ auto entry_names = reader.GetEntryNames ();
1401+ if (std::find (entry_names.begin (),
1402+ entry_names.end (),
1403+ BITMAP_INDEX_VALID_BITSET) != entry_names.end ()) {
1404+ auto valid_bitset_entry = reader.ReadEntry (BITMAP_INDEX_VALID_BITSET);
1405+ DeserializeValidBitsetData (valid_bitset_entry.data .data (),
1406+ valid_bitset_entry.data .size ());
1407+ rebuild_validity_from_postings = false ;
1408+ }
13331409
13341410 auto data_entry = reader.ReadEntry (BITMAP_INDEX_DATA);
13351411
@@ -1349,9 +1425,12 @@ BitmapIndex<T>::LoadEntries(storage::IndexEntryReader& reader,
13491425 data_entry.data .data (),
13501426 data_entry.data .size (),
13511427 index_length,
1352- priority);
1428+ priority,
1429+ rebuild_validity_from_postings);
13531430 } else {
1354- DeserializeIndexData (data_entry.data .data (), index_length);
1431+ DeserializeIndexData (data_entry.data .data (),
1432+ index_length,
1433+ rebuild_validity_from_postings);
13551434 }
13561435
13571436 if (enable_offset_cache.has_value () && enable_offset_cache.value ()) {
0 commit comments