Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit f7fe5bf

Browse filesBrowse files
committed
optimize heap walks, by Sam Gross, upstream of python/cpython#114133
1 parent 855e3b2 commit f7fe5bf
Copy full SHA for f7fe5bf

File tree

2 files changed

+87
-25
lines changed
Filter options

2 files changed

+87
-25
lines changed

‎src/heap.c

Copy file name to clipboardExpand all lines: src/heap.c
+73-25Lines changed: 73 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -528,46 +528,83 @@ void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page) {
528528
}
529529

530530

531+
static void mi_get_fast_divisor(size_t divisor, uint64_t* magic, size_t* shift) {
532+
mi_assert_internal(divisor > 0 && divisor <= UINT32_MAX);
533+
*shift = 64 - mi_clz(divisor - 1);
534+
*magic = ((((uint64_t)1 << 32) * (((uint64_t)1 << *shift) - divisor)) / divisor + 1);
535+
}
536+
537+
static size_t mi_fast_divide(size_t n, uint64_t magic, size_t shift) {
538+
mi_assert_internal(n <= UINT32_MAX);
539+
return ((((uint64_t)n * magic) >> 32) + n) >> shift;
540+
}
541+
531542
bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_block_visit_fun* visitor, void* arg) {
532543
mi_assert(area != NULL);
533544
if (area==NULL) return true;
534545
mi_assert(page != NULL);
535546
if (page == NULL) return true;
536547

537-
_mi_page_free_collect(page,true);
548+
_mi_page_free_collect(page,true); // collect both thread_delayed and local_free
538549
mi_assert_internal(page->local_free == NULL);
539550
if (page->used == 0) return true;
540551

541-
const size_t bsize = mi_page_block_size(page);
542-
const size_t ubsize = mi_page_usable_block_size(page); // without padding
543-
size_t psize;
544-
uint8_t* pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
552+
size_t psize;
553+
uint8_t* const pstart = _mi_segment_page_start(_mi_page_segment(page), page, &psize);
554+
mi_heap_t* const heap = mi_page_heap(page);
555+
const size_t bsize = mi_page_block_size(page);
556+
const size_t ubsize = mi_page_usable_block_size(page); // without padding
545557

558+
// optimize page with one block
546559
if (page->capacity == 1) {
547-
// optimize page with one block
548560
mi_assert_internal(page->used == 1 && page->free == NULL);
549561
return visitor(mi_page_heap(page), area, pstart, ubsize, arg);
550562
}
563+
mi_assert(bsize <= UINT32_MAX);
564+
565+
// optimize full pages
566+
if (page->used == page->capacity) {
567+
uint8_t* block = pstart;
568+
for (size_t i = 0; i < page->capacity; i++) {
569+
if (!visitor(heap, area, block, ubsize, arg)) return false;
570+
block += bsize;
571+
}
572+
return true;
573+
}
551574

552575
// create a bitmap of free blocks.
553576
#define MI_MAX_BLOCKS (MI_SMALL_PAGE_SIZE / sizeof(void*))
554-
uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)];
555-
memset(free_map, 0, sizeof(free_map));
577+
uintptr_t free_map[MI_MAX_BLOCKS / MI_INTPTR_BITS];
578+
const uintptr_t bmapsize = _mi_divide_up(page->capacity, MI_INTPTR_BITS);
579+
memset(free_map, 0, bmapsize * sizeof(intptr_t));
580+
if (page->capacity % MI_INTPTR_BITS != 0) {
581+
// mark left-over bits at the end as free
582+
size_t shift = (page->capacity % MI_INTPTR_BITS);
583+
uintptr_t mask = (UINTPTR_MAX << shift);
584+
free_map[bmapsize - 1] = mask;
585+
}
586+
587+
// fast repeated division by the block size
588+
uint64_t magic;
589+
size_t shift;
590+
mi_get_fast_divisor(bsize, &magic, &shift);
556591

557592
#if MI_DEBUG>1
558593
size_t free_count = 0;
559594
#endif
560-
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) {
595+
for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) {
561596
#if MI_DEBUG>1
562597
free_count++;
563598
#endif
564599
mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize));
565600
size_t offset = (uint8_t*)block - pstart;
566601
mi_assert_internal(offset % bsize == 0);
567-
size_t blockidx = offset / bsize; // Todo: avoid division?
568-
mi_assert_internal( blockidx < MI_MAX_BLOCKS);
569-
size_t bitidx = (blockidx / sizeof(uintptr_t));
570-
size_t bit = blockidx - (bitidx * sizeof(uintptr_t));
602+
mi_assert_internal(offset <= UINT32_MAX);
603+
size_t blockidx = mi_fast_divide(offset, magic, shift);
604+
mi_assert_internal(blockidx == offset / bsize);
605+
mi_assert_internal(blockidx < MI_MAX_BLOCKS);
606+
size_t bitidx = (blockidx / MI_INTPTR_BITS);
607+
size_t bit = blockidx - (bitidx * MI_INTPTR_BITS);
571608
free_map[bitidx] |= ((uintptr_t)1 << bit);
572609
}
573610
mi_assert_internal(page->capacity == (free_count + page->used));
@@ -576,19 +613,30 @@ bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t* page, mi_
576613
#if MI_DEBUG>1
577614
size_t used_count = 0;
578615
#endif
579-
for (size_t i = 0; i < page->capacity; i++) {
580-
size_t bitidx = (i / sizeof(uintptr_t));
581-
size_t bit = i - (bitidx * sizeof(uintptr_t));
582-
uintptr_t m = free_map[bitidx];
583-
if (bit == 0 && m == UINTPTR_MAX) {
584-
i += (sizeof(uintptr_t) - 1); // skip a run of free blocks
616+
uint8_t* block = pstart;
617+
for (size_t i = 0; i < bmapsize; i++) {
618+
if (free_map[i] == 0) {
619+
// every block is in use
620+
for (size_t j = 0; j < MI_INTPTR_BITS; j++) {
621+
#if MI_DEBUG>1
622+
used_count++;
623+
#endif
624+
if (!visitor(heap, area, block, ubsize, arg)) return false;
625+
block += bsize;
626+
}
585627
}
586-
else if ((m & ((uintptr_t)1 << bit)) == 0) {
587-
#if MI_DEBUG>1
588-
used_count++;
589-
#endif
590-
uint8_t* block = pstart + (i * bsize);
591-
if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false;
628+
else {
629+
// visit the used blocks in the mask
630+
uintptr_t m = ~free_map[i];
631+
while (m != 0) {
632+
#if MI_DEBUG>1
633+
used_count++;
634+
#endif
635+
size_t bitidx = mi_ctz(m);
636+
if (!visitor(heap, area, block + (bitidx * bsize), ubsize, arg)) return false;
637+
m &= m - 1; // clear least significant bit
638+
}
639+
block += bsize * MI_INTPTR_BITS;
592640
}
593641
}
594642
mi_assert_internal(page->used == used_count);

‎test/test-stress.c

Copy file name to clipboardExpand all lines: test/test-stress.c
+14Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,16 @@ static void free_items(void* p) {
129129
custom_free(p);
130130
}
131131

132+
/*
133+
static bool visit_blocks(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) {
134+
(void)(heap); (void)(area);
135+
size_t* total = (size_t*)arg;
136+
if (block != NULL) {
137+
total += block_size;
138+
}
139+
return true;
140+
}
141+
*/
132142

133143
static void stress(intptr_t tid) {
134144
//bench_start_thread();
@@ -173,6 +183,10 @@ static void stress(intptr_t tid) {
173183
data[data_idx] = q;
174184
}
175185
}
186+
// walk the heap
187+
// size_t total = 0;
188+
// mi_heap_visit_blocks(mi_heap_get_default(), true, visit_blocks, &total);
189+
176190
// free everything that is left
177191
for (size_t i = 0; i < retain_top; i++) {
178192
free_items(retained[i]);

0 commit comments

Comments
0 (0)
Morty Proxy This is a proxified and sanitized view of the page, visit original site.