@@ -528,46 +528,83 @@ void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
528
528
}
529
529
530
530
531
+ static void mi_get_fast_divisor (size_t divisor , uint64_t * magic , size_t * shift ) {
532
+ mi_assert_internal (divisor > 0 && divisor <= UINT32_MAX );
533
+ * shift = 64 - mi_clz (divisor - 1 );
534
+ * magic = ((((uint64_t )1 << 32 ) * (((uint64_t )1 << * shift ) - divisor )) / divisor + 1 );
535
+ }
536
+
537
+ static size_t mi_fast_divide (size_t n , uint64_t magic , size_t shift ) {
538
+ mi_assert_internal (n <= UINT32_MAX );
539
+ return ((((uint64_t )n * magic ) >> 32 ) + n ) >> shift ;
540
+ }
541
+
531
542
bool _mi_heap_area_visit_blocks (const mi_heap_area_t * area , mi_page_t * page , mi_block_visit_fun * visitor , void * arg ) {
532
543
mi_assert (area != NULL );
533
544
if (area == NULL ) return true;
534
545
mi_assert (page != NULL );
535
546
if (page == NULL ) return true;
536
547
537
- _mi_page_free_collect (page ,true);
548
+ _mi_page_free_collect (page ,true); // collect both thread_delayed and local_free
538
549
mi_assert_internal (page -> local_free == NULL );
539
550
if (page -> used == 0 ) return true;
540
551
541
- const size_t bsize = mi_page_block_size (page );
542
- const size_t ubsize = mi_page_usable_block_size (page ); // without padding
543
- size_t psize ;
544
- uint8_t * pstart = _mi_segment_page_start (_mi_page_segment (page ), page , & psize );
552
+ size_t psize ;
553
+ uint8_t * const pstart = _mi_segment_page_start (_mi_page_segment (page ), page , & psize );
554
+ mi_heap_t * const heap = mi_page_heap (page );
555
+ const size_t bsize = mi_page_block_size (page );
556
+ const size_t ubsize = mi_page_usable_block_size (page ); // without padding
545
557
558
+ // optimize page with one block
546
559
if (page -> capacity == 1 ) {
547
- // optimize page with one block
548
560
mi_assert_internal (page -> used == 1 && page -> free == NULL );
549
561
return visitor (mi_page_heap (page ), area , pstart , ubsize , arg );
550
562
}
563
+ mi_assert (bsize <= UINT32_MAX );
564
+
565
+ // optimize full pages
566
+ if (page -> used == page -> capacity ) {
567
+ uint8_t * block = pstart ;
568
+ for (size_t i = 0 ; i < page -> capacity ; i ++ ) {
569
+ if (!visitor (heap , area , block , ubsize , arg )) return false;
570
+ block += bsize ;
571
+ }
572
+ return true;
573
+ }
551
574
552
575
// create a bitmap of free blocks.
553
576
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
554
- uintptr_t free_map [MI_MAX_BLOCKS / sizeof (uintptr_t )];
555
- memset (free_map , 0 , sizeof (free_map ));
577
+ uintptr_t free_map [MI_MAX_BLOCKS / MI_INTPTR_BITS ];
578
+ const uintptr_t bmapsize = _mi_divide_up (page -> capacity , MI_INTPTR_BITS );
579
+ memset (free_map , 0 , bmapsize * sizeof (intptr_t ));
580
+ if (page -> capacity % MI_INTPTR_BITS != 0 ) {
581
+ // mark left-over bits at the end as free
582
+ size_t shift = (page -> capacity % MI_INTPTR_BITS );
583
+ uintptr_t mask = (UINTPTR_MAX << shift );
584
+ free_map [bmapsize - 1 ] = mask ;
585
+ }
586
+
587
+ // fast repeated division by the block size
588
+ uint64_t magic ;
589
+ size_t shift ;
590
+ mi_get_fast_divisor (bsize , & magic , & shift );
556
591
557
592
#if MI_DEBUG > 1
558
593
size_t free_count = 0 ;
559
594
#endif
560
- for (mi_block_t * block = page -> free ; block != NULL ; block = mi_block_next (page ,block )) {
595
+ for (mi_block_t * block = page -> free ; block != NULL ; block = mi_block_next (page , block )) {
561
596
#if MI_DEBUG > 1
562
597
free_count ++ ;
563
598
#endif
564
599
mi_assert_internal ((uint8_t * )block >= pstart && (uint8_t * )block < (pstart + psize ));
565
600
size_t offset = (uint8_t * )block - pstart ;
566
601
mi_assert_internal (offset % bsize == 0 );
567
- size_t blockidx = offset / bsize ; // Todo: avoid division?
568
- mi_assert_internal ( blockidx < MI_MAX_BLOCKS );
569
- size_t bitidx = (blockidx / sizeof (uintptr_t ));
570
- size_t bit = blockidx - (bitidx * sizeof (uintptr_t ));
602
+ mi_assert_internal (offset <= UINT32_MAX );
603
+ size_t blockidx = mi_fast_divide (offset , magic , shift );
604
+ mi_assert_internal (blockidx == offset / bsize );
605
+ mi_assert_internal (blockidx < MI_MAX_BLOCKS );
606
+ size_t bitidx = (blockidx / MI_INTPTR_BITS );
607
+ size_t bit = blockidx - (bitidx * MI_INTPTR_BITS );
571
608
free_map [bitidx ] |= ((uintptr_t )1 << bit );
572
609
}
573
610
mi_assert_internal (page -> capacity == (free_count + page -> used ));
@@ -576,19 +613,30 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_
576
613
#if MI_DEBUG > 1
577
614
size_t used_count = 0 ;
578
615
#endif
579
- for (size_t i = 0 ; i < page -> capacity ; i ++ ) {
580
- size_t bitidx = (i / sizeof (uintptr_t ));
581
- size_t bit = i - (bitidx * sizeof (uintptr_t ));
582
- uintptr_t m = free_map [bitidx ];
583
- if (bit == 0 && m == UINTPTR_MAX ) {
584
- i += (sizeof (uintptr_t ) - 1 ); // skip a run of free blocks
616
+ uint8_t * block = pstart ;
617
+ for (size_t i = 0 ; i < bmapsize ; i ++ ) {
618
+ if (free_map [i ] == 0 ) {
619
+ // every block is in use
620
+ for (size_t j = 0 ; j < MI_INTPTR_BITS ; j ++ ) {
621
+ #if MI_DEBUG > 1
622
+ used_count ++ ;
623
+ #endif
624
+ if (!visitor (heap , area , block , ubsize , arg )) return false;
625
+ block += bsize ;
626
+ }
585
627
}
586
- else if ((m & ((uintptr_t )1 << bit )) == 0 ) {
587
- #if MI_DEBUG > 1
588
- used_count ++ ;
589
- #endif
590
- uint8_t * block = pstart + (i * bsize );
591
- if (!visitor (mi_page_heap (page ), area , block , ubsize , arg )) return false;
628
+ else {
629
+ // visit the used blocks in the mask
630
+ uintptr_t m = ~free_map [i ];
631
+ while (m != 0 ) {
632
+ #if MI_DEBUG > 1
633
+ used_count ++ ;
634
+ #endif
635
+ size_t bitidx = mi_ctz (m );
636
+ if (!visitor (heap , area , block + (bitidx * bsize ), ubsize , arg )) return false;
637
+ m &= m - 1 ; // clear least significant bit
638
+ }
639
+ block += bsize * MI_INTPTR_BITS ;
592
640
}
593
641
}
594
642
mi_assert_internal (page -> used == used_count );
0 commit comments