From 4d0a0065299e299dbd628540b8b14abc12efc28d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 29 Jun 2019 14:33:22 -0700 Subject: [PATCH 01/17] Add radix tree implementation for obmalloc address_in_range(). The radix tree approach is a relatively simple and memory sanitary alternative to the current (slightly) unsanitary address_in_range(). The radix tree is currently only implemented for 64-bit platforms. Adding a 32-bit version would be relatively easy. --- Objects/obmalloc.c | 266 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 260 insertions(+), 6 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 03d0e8e51264c65..4818fcabe36b151 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -834,6 +834,14 @@ static int running_on_valgrind = -1; * -- Main tunable settings section -- */ +/* If defined, use radix tree to find if address is controlled by + * obmalloc. Otherwise, we use a slightly memory unsanitary scheme that + * has the advantage of performing very well. + */ +#if SIZEOF_VOID_P == 8 +#define WITH_RADIX_TREE +#endif + /* * Alignment of addresses returned to the user. 8-bytes alignment works * on most current architectures (with 32-bit or 64-bit address busses). @@ -907,18 +915,32 @@ static int running_on_valgrind = -1; * Arenas are allocated with mmap() on systems supporting anonymous memory * mappings to reduce heap fragmentation. */ -#define ARENA_SIZE (256 << 10) /* 256KB */ +#define ARENA_BITS 18 +#define ARENA_SIZE (1 << ARENA_BITS) /* 256 KiB */ #ifdef WITH_MEMORY_LIMITS #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) #endif /* - * Size of the pools used for small blocks. Should be a power of 2, - * between 1K and SYSTEM_PAGE_SIZE, that is: 1k, 2k, 4k. + * Size of the pools used for small blocks. Must be a power of 2. */ -#define POOL_SIZE SYSTEM_PAGE_SIZE /* must be 2^N */ -#define POOL_SIZE_MASK SYSTEM_PAGE_SIZE_MASK +#ifdef WITH_RADIX_TREE + +#define POOL_BITS 12 /* 4 KiB */ +#define POOL_SIZE (1 << POOL_BITS) /* 4 KiB */ + +#else + +/* + * For non-radix tree, must be between 1K and SYSTEM_PAGE_SIZE. E.g. 1k, 2k, + * 4k. + */ +#define POOL_SIZE SYSTEM_PAGE_SIZE + +#endif /* !WITH_RADIX_TREE */ + +#define POOL_SIZE_MASK (POOL_SIZE - 1) #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE) #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE @@ -1233,6 +1255,14 @@ _Py_GetAllocatedBlocks(void) return n; } +#ifdef WITH_RADIX_TREE +static int arena_map_is_marked(block *op); +static int arena_map_mark_used(uintptr_t arena_base, int is_used); + +/* number of used radix tree nodes */ +static int arena_map1_count; +static int arena_map2_count; +#endif /* Allocate a new arena. If we run out of memory, return NULL. Else * allocate a new arena, and return the address of an arena_object @@ -1302,6 +1332,15 @@ new_arena(void) unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); +#ifdef WITH_RADIX_TREE + if (address != NULL) { + if (!arena_map_mark_used((uintptr_t)address, 1)) { + /* marking arena in radix tree failed, abort */ + _PyObject_Arena.free(_PyObject_Arena.ctx, address, ARENA_SIZE); + address = NULL; + } + } +#endif if (address == NULL) { /* The allocation failed: return NULL after putting the * arenaobj back. @@ -1332,6 +1371,19 @@ new_arena(void) } +#ifdef WITH_RADIX_TREE + +/* Return true if and only if P is an address that was allocated by + pymalloc. When the radix tree is used, 'poolp' is unused. + */ +static bool +address_in_range(void *p, poolp pool) +{ + return arena_map_is_marked(p); +} + +#else /* !WITH_RADIX_TREE */ + /* address_in_range(P, POOL) @@ -1422,7 +1474,7 @@ address_in_range(void *p, poolp pool) (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && arenas[arenaindex].address != 0; } - +#endif /* !WITH_RADIX_TREE */ /*==========================================================================*/ @@ -1768,6 +1820,11 @@ insert_to_freepool(poolp pool) ao->nextarena = unused_arena_objects; unused_arena_objects = ao; +#ifdef WITH_RADIX_TREE + /* mark arena as not under control of obmalloc */ + arena_map_mark_used(ao->address, 0); +#endif + /* Free the entire arena. */ _PyObject_Arena.free(_PyObject_Arena.ctx, (void *)ao->address, ARENA_SIZE); @@ -2711,6 +2768,12 @@ _PyObject_DebugMallocStats(FILE *out) (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); (void)printone(out, "# arenas highwater mark", narenas_highwater); (void)printone(out, "# arenas allocated current", narenas); +#ifdef WITH_RADIX_TREE + (void)printone(out, "# arena map level 1 nodes", arena_map1_count); + (void)printone(out, "# arena map level 2 nodes", arena_map2_count); + fputc('\n', out); +#endif + PyOS_snprintf(buf, sizeof(buf), "%zu arenas * %d bytes/arena", @@ -2733,4 +2796,195 @@ _PyObject_DebugMallocStats(FILE *out) return 1; } + +#ifdef WITH_RADIX_TREE +/* radix tree for tracking arena coverage + + key format (2^20 arena size) + 15 -> MAP1 + 15 -> MAP2 + 14 -> MAP3 + 20 -> ideal aligned arena + ---- + 64 +*/ + +/* number of bits in a pointer */ +#define BITS 64 + +#if SIZEOF_VOID_P != 8 + /* Currently this code works for 64-bit pointers only. For 32-bits, we + * could use a two-layer tree but it hasn't been implemented yet. */ +#error "Radix tree requires 64-bit pointers." +#endif + +#define ARENA_MASK (ARENA_SIZE - 1) + +/* arena_coverage_t members require this to be true */ +#if ARENA_BITS >= 32 +# error "arena size must be < 2^32" +#endif + +/* bits used for MAP1 and MAP2 nodes */ +#define INTERIOR_BITS ((BITS - ARENA_BITS + 2) / 3) + +#define MAP1_BITS INTERIOR_BITS +#define MAP1_LENGTH (1 << MAP1_BITS) + +#define MAP2_BITS INTERIOR_BITS +#define MAP2_LENGTH (1 << MAP2_BITS) +#define MAP2_MASK (MAP2_LENGTH - 1) + +#define MAP3_BITS (BITS - ARENA_BITS - 2*INTERIOR_BITS) +#define MAP3_LENGTH (1 << MAP3_BITS) +#define MAP3_MASK (MAP3_LENGTH - 1) + +#define MAP3_SHIFT ARENA_BITS +#define MAP2_SHIFT (MAP3_BITS + MAP3_SHIFT) +#define MAP1_SHIFT (MAP2_BITS + MAP2_SHIFT) + +#define AS_UINT(p) ((uintptr_t)(p)) +#define MAP3_INDEX(p) ((AS_UINT(p) >> MAP3_SHIFT) & MAP3_MASK) +#define MAP2_INDEX(p) ((AS_UINT(p) >> MAP2_SHIFT) & MAP2_MASK) +#define MAP1_INDEX(p) (AS_UINT(p) >> MAP1_SHIFT) + +/* See arena_map_mark_used() for the meaning of these members. */ +typedef struct { + int32_t tail_hi; + int32_t tail_lo; +} arena_coverage_t; + +typedef struct arena_map3 { + /* The members tail_hi and tail_lo are accessed together. So, it + * better to have them as an array of structs, rather than two + * arrays. + */ + arena_coverage_t arenas[MAP3_LENGTH]; +} arena_map3_t; + +typedef struct arena_map2 { + struct arena_map3 *ptrs[MAP2_LENGTH]; +} arena_map2_t; + +typedef struct arena_map1 { + struct arena_map2 *ptrs[MAP1_LENGTH]; +} arena_map1_t; + +/* The root of tree (MAP1) and contains all MAP2 nodes. Note that by + * initializing like this, the memory should be in the BSS. The OS will + * only map pages as the MAP2 nodes get used (OS pages are demand loaded + * as needed). + */ +static arena_map1_t arena_map_root; + +/* Return a pointer to a MAP3 node, return NULL if it doesn't exist + * or it cannot be created */ +static arena_map3_t * +arena_map_get(block *p, int create) +{ + int i1 = MAP1_INDEX(p); + if (arena_map_root.ptrs[i1] == NULL) { + if (!create) { + return NULL; + } + arena_map2_t *n = PyMem_RawCalloc(1, sizeof(arena_map2_t)); + if (n == NULL) { + return NULL; + } + arena_map_root.ptrs[i1] = n; + arena_map1_count++; + } + int i2 = MAP2_INDEX(p); + if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) { + if (!create) { + return NULL; + } + arena_map3_t *n = PyMem_RawCalloc(1, sizeof(arena_map3_t)); + if (n == NULL) { + return NULL; + } + arena_map_root.ptrs[i1]->ptrs[i2] = n; + arena_map2_count++; + } + return arena_map_root.ptrs[i1]->ptrs[i2]; +} + +/* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw + * away 24 bits of the address. That reduces the space requirement of + * the tree compared to similar radix tree page-map schemes. In + * exchange for slashing the space requirement, it needs more + * computation to check an address. + * + * Tracking coverage is done by "ideal" arena address. It is easier to + * explain in decimal so let's say that the arena size is 100 bytes. + * Then, ideal addresses are 100, 200, 300, etc. For checking if a + * pointer address is inside an actual arena, we have to check two ideal + * arena addresses. E.g. if pointer is 357, we need to check 200 and + * 300. In the rare case that an arena is aligned in the ideal way + * (e.g. base address of arena is 200) then we only have to check one + * ideal address. + * + * The tree nodes for 200 and 300 both store the address of arena. + * There are two cases: the arena starts at a lower ideal arena and + * extends to this one, or the arena starts in this arena and extends to + * the next ideal arena. The tail_lo and tail_hi members correspond to + * these two cases. + */ + + +/* mark or unmark addresses covered by arena */ +static int +arena_map_mark_used(uintptr_t arena_base, int is_used) +{ + arena_map3_t *n_hi = arena_map_get((block *)arena_base, is_used); + if (n_hi == NULL) { + assert(is_used); /* otherwise node should already exist */ + return 0; /* failed to allocate space for node */ + } + int i3 = MAP3_INDEX((block *)arena_base); + int32_t tail = (int32_t)(arena_base & ARENA_MASK); + if (tail == 0) { + /* is ideal arena address */ + n_hi->arenas[i3].tail_hi = is_used ? -1 : 0; + } + else { + /* arena_base address is not ideal (aligned to arena size) and + * so it potentially covers two MAP3 nodes. Get the MAP3 node + * for the next arena. Note that it might be in different MAP1 + * and MAP2 nodes as well so we need to call arena_map_get() + * again (do the full tree traversal). + */ + n_hi->arenas[i3].tail_hi = is_used ? tail : 0; + uintptr_t arena_base_next = arena_base + ARENA_SIZE; + arena_map3_t *n_lo = arena_map_get((block *)arena_base_next, is_used); + if (n_lo == NULL) { + assert(is_used); /* otherwise should already exist */ + n_hi->arenas[i3].tail_hi = 0; + return 0; /* failed to allocate space for node */ + } + int i3_next = MAP3_INDEX(arena_base_next); + n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; + } + return 1; +} + +/* Return true if 'p' is a pointer inside an obmalloc arena. + * _PyObject_Free() calls this so it needs to be very fast. */ +static int +arena_map_is_marked(block *p) +{ + arena_map3_t *n = arena_map_get(p, 0); + if (n == NULL) { + return 0; + } + int i3 = MAP3_INDEX(p); + /* in order to fit tail into 32-bits, ARENA_BITS must be <= 32 */ + int32_t hi = n->arenas[i3].tail_hi; + int32_t lo = n->arenas[i3].tail_lo; + int32_t tail = (int32_t)(AS_UINT(p) & ARENA_MASK); + return (tail < lo) || (tail >= hi && hi != 0); +} + +#endif /* WITH_RADIX_TREE */ + #endif /* #ifdef WITH_PYMALLOC */ From 295ad0b50048bf780118888db6331b323ba42597 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 29 Jun 2019 16:29:33 -0700 Subject: [PATCH 02/17] Add News entry. --- .../2019-06-29-16-27-23.bpo-37448.QI5V97.rst | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-06-29-16-27-23.bpo-37448.QI5V97.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-06-29-16-27-23.bpo-37448.QI5V97.rst b/Misc/NEWS.d/next/Core and Builtins/2019-06-29-16-27-23.bpo-37448.QI5V97.rst new file mode 100644 index 000000000000000..af17e608b09be87 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-06-29-16-27-23.bpo-37448.QI5V97.rst @@ -0,0 +1,8 @@ +Add radix tree implementation of obmalloc's address_in_range(). Use it +by default on 64-bit platforms. The radix tree eliminates the +(slightly) memory unsanitary behavior of the current address_in_range() +approach. It also has the small advantage of easily allowing larger +pool sizes. The current address_in_range() requires that pools be no +larger than the OS page size. + +Co-authored-by: Tim Peters From e6e100b2286adeee2d434d424d94201ffde49625 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 30 Jun 2019 13:44:41 -0700 Subject: [PATCH 03/17] Improve comment about size of ARENA_BITS. --- Objects/obmalloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 4818fcabe36b151..78b0c85b798f0ce 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2978,7 +2978,7 @@ arena_map_is_marked(block *p) return 0; } int i3 = MAP3_INDEX(p); - /* in order to fit tail into 32-bits, ARENA_BITS must be <= 32 */ + /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */ int32_t hi = n->arenas[i3].tail_hi; int32_t lo = n->arenas[i3].tail_lo; int32_t tail = (int32_t)(AS_UINT(p) & ARENA_MASK); From c96c215a08908027eb0732bf328c54cacb1ec352 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Mon, 1 Jul 2019 11:04:34 -0700 Subject: [PATCH 04/17] handle overflow of arena_base_next --- Objects/obmalloc.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 78b0c85b798f0ce..4eff0d6700ce357 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2955,15 +2955,18 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) * again (do the full tree traversal). */ n_hi->arenas[i3].tail_hi = is_used ? tail : 0; - uintptr_t arena_base_next = arena_base + ARENA_SIZE; - arena_map3_t *n_lo = arena_map_get((block *)arena_base_next, is_used); - if (n_lo == NULL) { - assert(is_used); /* otherwise should already exist */ - n_hi->arenas[i3].tail_hi = 0; - return 0; /* failed to allocate space for node */ + uintptr_t arena_next = arena_base + ARENA_SIZE; + /* check for overflow of arena_next */ + if (arena_next > arena_base) { + arena_map3_t *n_lo = arena_map_get((block *)arena_next, is_used); + if (n_lo == NULL) { + assert(is_used); /* otherwise should already exist */ + n_hi->arenas[i3].tail_hi = 0; + return 0; /* failed to allocate space for node */ + } + int i3_next = MAP3_INDEX(arena_next); + n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; } - int i3_next = MAP3_INDEX(arena_base_next); - n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; } return 1; } From e2d9541496cc924734d87e8e7adb40fd4cb06e35 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 2 Jul 2019 22:30:26 -0700 Subject: [PATCH 05/17] change arena_next overflow check to an assert --- Objects/obmalloc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 4eff0d6700ce357..cb2a4c910e7a5da 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2956,7 +2956,11 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) */ n_hi->arenas[i3].tail_hi = is_used ? tail : 0; uintptr_t arena_next = arena_base + ARENA_SIZE; - /* check for overflow of arena_next */ + /* If arena_base is a legit arena address, so is arena_next - 1 + * (last address in arena). If arena_next overflows then it + * must overflow to 0. However, that would mean arena_base was + * "ideal" and we should not be in this case. */ + assert(arena_base < arena_next); if (arena_next > arena_base) { arena_map3_t *n_lo = arena_map_get((block *)arena_next, is_used); if (n_lo == NULL) { From d7361d676bfe0765e3bd9cda2681187b2bbee460 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 2 Jul 2019 22:31:07 -0700 Subject: [PATCH 06/17] Exploit current hardware physical address bits Current 64-bit chips only have 48 bits of physical addresses. Exploit that to further shrink the size of the radix tree. --- Objects/obmalloc.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index cb2a4c910e7a5da..48eaa86bad7b849 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2825,17 +2825,24 @@ _PyObject_DebugMallocStats(FILE *out) # error "arena size must be < 2^32" #endif +/* Current 64-bit processors are limited to 48-bit physical addresses. For + * now, the top 17 bits of addresses will all be equal to bit 2**47. If that + * changes in the future, this must be adjusted upwards. + */ +#define PHYSICAL_BITS 48 + /* bits used for MAP1 and MAP2 nodes */ -#define INTERIOR_BITS ((BITS - ARENA_BITS + 2) / 3) +#define INTERIOR_BITS ((PHYSICAL_BITS - ARENA_BITS + 2) / 3) #define MAP1_BITS INTERIOR_BITS #define MAP1_LENGTH (1 << MAP1_BITS) +#define MAP1_MASK (MAP3_LENGTH - 1) #define MAP2_BITS INTERIOR_BITS #define MAP2_LENGTH (1 << MAP2_BITS) #define MAP2_MASK (MAP2_LENGTH - 1) -#define MAP3_BITS (BITS - ARENA_BITS - 2*INTERIOR_BITS) +#define MAP3_BITS (PHYSICAL_BITS - ARENA_BITS - 2*INTERIOR_BITS) #define MAP3_LENGTH (1 << MAP3_BITS) #define MAP3_MASK (MAP3_LENGTH - 1) @@ -2846,7 +2853,8 @@ _PyObject_DebugMallocStats(FILE *out) #define AS_UINT(p) ((uintptr_t)(p)) #define MAP3_INDEX(p) ((AS_UINT(p) >> MAP3_SHIFT) & MAP3_MASK) #define MAP2_INDEX(p) ((AS_UINT(p) >> MAP2_SHIFT) & MAP2_MASK) -#define MAP1_INDEX(p) (AS_UINT(p) >> MAP1_SHIFT) +#define MAP1_INDEX(p) ((AS_UINT(p) >> MAP1_SHIFT) & MAP1_MASK) +#define HIGH_BITS(p) (AS_UINT(p) >> PHYSICAL_BITS) /* See arena_map_mark_used() for the meaning of these members. */ typedef struct { @@ -2882,6 +2890,8 @@ static arena_map1_t arena_map_root; static arena_map3_t * arena_map_get(block *p, int create) { + /* sanity check that PHYSICAL_BITS is correct */ + assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root)); int i1 = MAP1_INDEX(p); if (arena_map_root.ptrs[i1] == NULL) { if (!create) { @@ -2936,6 +2946,8 @@ arena_map_get(block *p, int create) static int arena_map_mark_used(uintptr_t arena_base, int is_used) { + /* sanity check that PHYSICAL_BITS is correct */ + assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); arena_map3_t *n_hi = arena_map_get((block *)arena_base, is_used); if (n_hi == NULL) { assert(is_used); /* otherwise node should already exist */ From 3492ef6576a8b1cbabb05454e790dfca60eef79d Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 3 Jul 2019 20:20:05 -0700 Subject: [PATCH 07/17] Remove if, assert() is sufficient. --- Objects/obmalloc.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 48eaa86bad7b849..dbeea897c405758 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -2967,22 +2967,20 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) * again (do the full tree traversal). */ n_hi->arenas[i3].tail_hi = is_used ? tail : 0; - uintptr_t arena_next = arena_base + ARENA_SIZE; - /* If arena_base is a legit arena address, so is arena_next - 1 - * (last address in arena). If arena_next overflows then it + uintptr_t arena_base_next = arena_base + ARENA_SIZE; + /* If arena_base is a legit arena address, so is arena_base_next - 1 + * (last address in arena). If arena_base_next overflows then it * must overflow to 0. However, that would mean arena_base was * "ideal" and we should not be in this case. */ - assert(arena_base < arena_next); - if (arena_next > arena_base) { - arena_map3_t *n_lo = arena_map_get((block *)arena_next, is_used); - if (n_lo == NULL) { - assert(is_used); /* otherwise should already exist */ - n_hi->arenas[i3].tail_hi = 0; - return 0; /* failed to allocate space for node */ - } - int i3_next = MAP3_INDEX(arena_next); - n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; + assert(arena_base < arena_base_next); + arena_map3_t *n_lo = arena_map_get((block *)arena_base_next, is_used); + if (n_lo == NULL) { + assert(is_used); /* otherwise should already exist */ + n_hi->arenas[i3].tail_hi = 0; + return 0; /* failed to allocate space for node */ } + int i3_next = MAP3_INDEX(arena_base_next); + n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; } return 1; } From 9b2886f056afdfb90d21113383f1346d339b1e4b Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 20 Feb 2021 12:47:54 -0800 Subject: [PATCH 08/17] wip: 32-bit version --- Objects/obmalloc.c | 68 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 11 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index dbeea897c405758..0561bebd0aed517 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -841,6 +841,7 @@ static int running_on_valgrind = -1; #if SIZEOF_VOID_P == 8 #define WITH_RADIX_TREE #endif +#define WITH_RADIX_TREE /* * Alignment of addresses returned to the user. 8-bytes alignment works @@ -2807,16 +2808,41 @@ _PyObject_DebugMallocStats(FILE *out) 20 -> ideal aligned arena ---- 64 + + key format (2^20 arena size) + 12 -> MAP3 + 20 -> ideal aligned arena + ---- + 32 + */ +#if SIZEOF_VOID_P == 8 /* number of bits in a pointer */ #define BITS 64 -#if SIZEOF_VOID_P != 8 - /* Currently this code works for 64-bit pointers only. For 32-bits, we - * could use a two-layer tree but it hasn't been implemented yet. */ -#error "Radix tree requires 64-bit pointers." -#endif +/* Current 64-bit processors are limited to 48-bit physical addresses. For + * now, the top 17 bits of addresses will all be equal to bit 2**47. If that + * changes in the future, this must be adjusted upwards. + */ +#define PHYSICAL_BITS 48 + +/* need more layers of tree */ +#define USE_INTERIOR_NODES + +#define arena_root_t arena_map1_t + +#elif SIZEOF_VOID_P == 4 +#define BITS 32 +#define PHYSICAL_BITS 32 + +#else + + /* Currently this code works for 64-bit or 32-bit pointers only. */ +#error "obmalloc radix tree requires 64-bit or 32-bit pointers." + +#endif /* SIZEOF_VOID_P */ + #define ARENA_MASK (ARENA_SIZE - 1) @@ -2825,14 +2851,12 @@ _PyObject_DebugMallocStats(FILE *out) # error "arena size must be < 2^32" #endif -/* Current 64-bit processors are limited to 48-bit physical addresses. For - * now, the top 17 bits of addresses will all be equal to bit 2**47. If that - * changes in the future, this must be adjusted upwards. - */ -#define PHYSICAL_BITS 48 - +#ifdef USE_INTERIOR_NODES /* bits used for MAP1 and MAP2 nodes */ #define INTERIOR_BITS ((PHYSICAL_BITS - ARENA_BITS + 2) / 3) +#else +#define INTERIOR_BITS 0 +#endif #define MAP1_BITS INTERIOR_BITS #define MAP1_LENGTH (1 << MAP1_BITS) @@ -2854,7 +2878,12 @@ _PyObject_DebugMallocStats(FILE *out) #define MAP3_INDEX(p) ((AS_UINT(p) >> MAP3_SHIFT) & MAP3_MASK) #define MAP2_INDEX(p) ((AS_UINT(p) >> MAP2_SHIFT) & MAP2_MASK) #define MAP1_INDEX(p) ((AS_UINT(p) >> MAP1_SHIFT) & MAP1_MASK) +#if PHYSICAL_BITS > BITS #define HIGH_BITS(p) (AS_UINT(p) >> PHYSICAL_BITS) +#else +#define HIGH_BITS(p) 0 +#endif + /* See arena_map_mark_used() for the meaning of these members. */ typedef struct { @@ -2870,6 +2899,7 @@ typedef struct arena_map3 { arena_coverage_t arenas[MAP3_LENGTH]; } arena_map3_t; +#ifdef USE_INTERIOR_NODES typedef struct arena_map2 { struct arena_map3 *ptrs[MAP2_LENGTH]; } arena_map2_t; @@ -2877,12 +2907,14 @@ typedef struct arena_map2 { typedef struct arena_map1 { struct arena_map2 *ptrs[MAP1_LENGTH]; } arena_map1_t; +#endif /* The root of tree (MAP1) and contains all MAP2 nodes. Note that by * initializing like this, the memory should be in the BSS. The OS will * only map pages as the MAP2 nodes get used (OS pages are demand loaded * as needed). */ +#ifdef USE_INTERIOR_NODES static arena_map1_t arena_map_root; /* Return a pointer to a MAP3 node, return NULL if it doesn't exist @@ -2919,6 +2951,20 @@ arena_map_get(block *p, int create) return arena_map_root.ptrs[i1]->ptrs[i2]; } +#else /* !USE_INTERIOR_NODES */ +static arena_map3_t arena_map_root; + +/* Return a pointer to a MAP3 node, return NULL if it doesn't exist + * or it cannot be created */ +static arena_map3_t * +arena_map_get(block *p, int create) +{ + return &arena_map_root; +} + +#endif + + /* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw * away 24 bits of the address. That reduces the space requirement of * the tree compared to similar radix tree page-map schemes. In From 2e025b0b934bc5f508ac9e056697b48398705740 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 20 Feb 2021 13:07:21 -0800 Subject: [PATCH 09/17] wip: re-org code --- Objects/obmalloc.c | 639 ++++++++++++++++++--------------------------- 1 file changed, 260 insertions(+), 379 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 0561bebd0aed517..6aa88dd4fda0268 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -834,15 +834,6 @@ static int running_on_valgrind = -1; * -- Main tunable settings section -- */ -/* If defined, use radix tree to find if address is controlled by - * obmalloc. Otherwise, we use a slightly memory unsanitary scheme that - * has the advantage of performing very well. - */ -#if SIZEOF_VOID_P == 8 -#define WITH_RADIX_TREE -#endif -#define WITH_RADIX_TREE - /* * Alignment of addresses returned to the user. 8-bytes alignment works * on most current architectures (with 32-bit or 64-bit address busses). @@ -926,21 +917,8 @@ static int running_on_valgrind = -1; /* * Size of the pools used for small blocks. Must be a power of 2. */ -#ifdef WITH_RADIX_TREE - #define POOL_BITS 12 /* 4 KiB */ #define POOL_SIZE (1 << POOL_BITS) /* 4 KiB */ - -#else - -/* - * For non-radix tree, must be between 1K and SYSTEM_PAGE_SIZE. E.g. 1k, 2k, - * 4k. - */ -#define POOL_SIZE SYSTEM_PAGE_SIZE - -#endif /* !WITH_RADIX_TREE */ - #define POOL_SIZE_MASK (POOL_SIZE - 1) #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE) @@ -1256,15 +1234,266 @@ _Py_GetAllocatedBlocks(void) return n; } -#ifdef WITH_RADIX_TREE -static int arena_map_is_marked(block *op); -static int arena_map_mark_used(uintptr_t arena_base, int is_used); +/*==========================================================================*/ +/* radix tree for tracking arena coverage + + key format (2^20 arena size) + 15 -> MAP1 + 15 -> MAP2 + 14 -> MAP3 + 20 -> ideal aligned arena + ---- + 64 + + key format (2^20 arena size) + 12 -> MAP3 + 20 -> ideal aligned arena + ---- + 32 + +*/ + +#if SIZEOF_VOID_P == 8 + +/* number of bits in a pointer */ +#define BITS 64 + +/* Current 64-bit processors are limited to 48-bit physical addresses. For + * now, the top 17 bits of addresses will all be equal to bit 2**47. If that + * changes in the future, this must be adjusted upwards. + */ +#define PHYSICAL_BITS 48 + +/* need more layers of radix tree */ +#define USE_INTERIOR_NODES + +#define arena_root_t arena_map1_t + +#elif SIZEOF_VOID_P == 4 + +#define BITS 32 +#define PHYSICAL_BITS 32 + +#else + + /* Currently this code works for 64-bit or 32-bit pointers only. */ +#error "obmalloc radix tree requires 64-bit or 32-bit pointers." + +#endif /* SIZEOF_VOID_P */ + +#define ARENA_MASK (ARENA_SIZE - 1) + +/* arena_coverage_t members require this to be true */ +#if ARENA_BITS >= 32 +# error "arena size must be < 2^32" +#endif + +#ifdef USE_INTERIOR_NODES +/* bits used for MAP1 and MAP2 nodes */ +#define INTERIOR_BITS ((PHYSICAL_BITS - ARENA_BITS + 2) / 3) +#else +#define INTERIOR_BITS 0 +#endif + +#define MAP1_BITS INTERIOR_BITS +#define MAP1_LENGTH (1 << MAP1_BITS) +#define MAP1_MASK (MAP3_LENGTH - 1) + +#define MAP2_BITS INTERIOR_BITS +#define MAP2_LENGTH (1 << MAP2_BITS) +#define MAP2_MASK (MAP2_LENGTH - 1) + +#define MAP3_BITS (PHYSICAL_BITS - ARENA_BITS - 2*INTERIOR_BITS) +#define MAP3_LENGTH (1 << MAP3_BITS) +#define MAP3_MASK (MAP3_LENGTH - 1) + +#define MAP3_SHIFT ARENA_BITS +#define MAP2_SHIFT (MAP3_BITS + MAP3_SHIFT) +#define MAP1_SHIFT (MAP2_BITS + MAP2_SHIFT) + +#define AS_UINT(p) ((uintptr_t)(p)) +#define MAP3_INDEX(p) ((AS_UINT(p) >> MAP3_SHIFT) & MAP3_MASK) +#define MAP2_INDEX(p) ((AS_UINT(p) >> MAP2_SHIFT) & MAP2_MASK) +#define MAP1_INDEX(p) ((AS_UINT(p) >> MAP1_SHIFT) & MAP1_MASK) + +#if PHYSICAL_BITS > BITS +#define HIGH_BITS(p) (AS_UINT(p) >> PHYSICAL_BITS) +#else +#define HIGH_BITS(p) 0 +#endif + + +/* See arena_map_mark_used() for the meaning of these members. */ +typedef struct { + int32_t tail_hi; + int32_t tail_lo; +} arena_coverage_t; + +typedef struct arena_map3 { + /* The members tail_hi and tail_lo are accessed together. So, it + * better to have them as an array of structs, rather than two + * arrays. + */ + arena_coverage_t arenas[MAP3_LENGTH]; +} arena_map3_t; + +#ifdef USE_INTERIOR_NODES +typedef struct arena_map2 { + struct arena_map3 *ptrs[MAP2_LENGTH]; +} arena_map2_t; + +typedef struct arena_map1 { + struct arena_map2 *ptrs[MAP1_LENGTH]; +} arena_map1_t; +#endif + +/* The root of tree (MAP1) and contains all MAP2 nodes. Note that by + * initializing like this, the memory should be in the BSS. The OS will + * only map pages as the MAP2 nodes get used (OS pages are demand loaded + * as needed). + */ +#ifdef USE_INTERIOR_NODES +static arena_map1_t arena_map_root; /* number of used radix tree nodes */ static int arena_map1_count; static int arena_map2_count; + +/* Return a pointer to a MAP3 node, return NULL if it doesn't exist + * or it cannot be created */ +static arena_map3_t * +arena_map_get(block *p, int create) +{ + /* sanity check that PHYSICAL_BITS is correct */ + assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root)); + int i1 = MAP1_INDEX(p); + if (arena_map_root.ptrs[i1] == NULL) { + if (!create) { + return NULL; + } + arena_map2_t *n = PyMem_RawCalloc(1, sizeof(arena_map2_t)); + if (n == NULL) { + return NULL; + } + arena_map_root.ptrs[i1] = n; + arena_map1_count++; + } + int i2 = MAP2_INDEX(p); + if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) { + if (!create) { + return NULL; + } + arena_map3_t *n = PyMem_RawCalloc(1, sizeof(arena_map3_t)); + if (n == NULL) { + return NULL; + } + arena_map_root.ptrs[i1]->ptrs[i2] = n; + arena_map2_count++; + } + return arena_map_root.ptrs[i1]->ptrs[i2]; +} + +#else /* !USE_INTERIOR_NODES */ +static arena_map3_t arena_map_root; + +/* Return a pointer to a MAP3 node, return NULL if it doesn't exist + * or it cannot be created */ +static arena_map3_t * +arena_map_get(block *p, int create) +{ + return &arena_map_root; +} + #endif + +/* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw + * away 24 bits of the address. That reduces the space requirement of + * the tree compared to similar radix tree page-map schemes. In + * exchange for slashing the space requirement, it needs more + * computation to check an address. + * + * Tracking coverage is done by "ideal" arena address. It is easier to + * explain in decimal so let's say that the arena size is 100 bytes. + * Then, ideal addresses are 100, 200, 300, etc. For checking if a + * pointer address is inside an actual arena, we have to check two ideal + * arena addresses. E.g. if pointer is 357, we need to check 200 and + * 300. In the rare case that an arena is aligned in the ideal way + * (e.g. base address of arena is 200) then we only have to check one + * ideal address. + * + * The tree nodes for 200 and 300 both store the address of arena. + * There are two cases: the arena starts at a lower ideal arena and + * extends to this one, or the arena starts in this arena and extends to + * the next ideal arena. The tail_lo and tail_hi members correspond to + * these two cases. + */ + + +/* mark or unmark addresses covered by arena */ +static int +arena_map_mark_used(uintptr_t arena_base, int is_used) +{ + /* sanity check that PHYSICAL_BITS is correct */ + assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); + arena_map3_t *n_hi = arena_map_get((block *)arena_base, is_used); + if (n_hi == NULL) { + assert(is_used); /* otherwise node should already exist */ + return 0; /* failed to allocate space for node */ + } + int i3 = MAP3_INDEX((block *)arena_base); + int32_t tail = (int32_t)(arena_base & ARENA_MASK); + if (tail == 0) { + /* is ideal arena address */ + n_hi->arenas[i3].tail_hi = is_used ? -1 : 0; + } + else { + /* arena_base address is not ideal (aligned to arena size) and + * so it potentially covers two MAP3 nodes. Get the MAP3 node + * for the next arena. Note that it might be in different MAP1 + * and MAP2 nodes as well so we need to call arena_map_get() + * again (do the full tree traversal). + */ + n_hi->arenas[i3].tail_hi = is_used ? tail : 0; + uintptr_t arena_base_next = arena_base + ARENA_SIZE; + /* If arena_base is a legit arena address, so is arena_base_next - 1 + * (last address in arena). If arena_base_next overflows then it + * must overflow to 0. However, that would mean arena_base was + * "ideal" and we should not be in this case. */ + assert(arena_base < arena_base_next); + arena_map3_t *n_lo = arena_map_get((block *)arena_base_next, is_used); + if (n_lo == NULL) { + assert(is_used); /* otherwise should already exist */ + n_hi->arenas[i3].tail_hi = 0; + return 0; /* failed to allocate space for node */ + } + int i3_next = MAP3_INDEX(arena_base_next); + n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; + } + return 1; +} + +/* Return true if 'p' is a pointer inside an obmalloc arena. + * _PyObject_Free() calls this so it needs to be very fast. */ +static int +arena_map_is_marked(block *p) +{ + arena_map3_t *n = arena_map_get(p, 0); + if (n == NULL) { + return 0; + } + int i3 = MAP3_INDEX(p); + /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */ + int32_t hi = n->arenas[i3].tail_hi; + int32_t lo = n->arenas[i3].tail_lo; + int32_t tail = (int32_t)(AS_UINT(p) & ARENA_MASK); + return (tail < lo) || (tail >= hi && hi != 0); +} + +/* end of radix tree logic */ +/*==========================================================================*/ + + /* Allocate a new arena. If we run out of memory, return NULL. Else * allocate a new arena, and return the address of an arena_object * describing the new arena. It's expected that the caller will set @@ -1333,7 +1562,6 @@ new_arena(void) unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); -#ifdef WITH_RADIX_TREE if (address != NULL) { if (!arena_map_mark_used((uintptr_t)address, 1)) { /* marking arena in radix tree failed, abort */ @@ -1341,7 +1569,6 @@ new_arena(void) address = NULL; } } -#endif if (address == NULL) { /* The allocation failed: return NULL after putting the * arenaobj back. @@ -1372,110 +1599,16 @@ new_arena(void) } -#ifdef WITH_RADIX_TREE /* Return true if and only if P is an address that was allocated by pymalloc. When the radix tree is used, 'poolp' is unused. */ -static bool -address_in_range(void *p, poolp pool) -{ - return arena_map_is_marked(p); -} - -#else /* !WITH_RADIX_TREE */ - -/* -address_in_range(P, POOL) - -Return true if and only if P is an address that was allocated by pymalloc. -POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P) -(the caller is asked to compute this because the macro expands POOL more than -once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a -variable and pass the latter to the macro; because address_in_range is -called on every alloc/realloc/free, micro-efficiency is important here). - -Tricky: Let B be the arena base address associated with the pool, B = -arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if - - B <= P < B + ARENA_SIZE - -Subtracting B throughout, this is true iff - - 0 <= P-B < ARENA_SIZE - -By using unsigned arithmetic, the "0 <=" half of the test can be skipped. - -Obscure: A PyMem "free memory" function can call the pymalloc free or realloc -before the first arena has been allocated. `arenas` is still NULL in that -case. We're relying on that maxarenas is also 0 in that case, so that -(POOL)->arenaindex < maxarenas must be false, saving us from trying to index -into a NULL arenas. - -Details: given P and POOL, the arena_object corresponding to P is AO = -arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild -stores, etc), POOL is the correct address of P's pool, AO.address is the -correct base address of the pool's arena, and P must be within ARENA_SIZE of -AO.address. In addition, AO.address is not 0 (no arena can start at address 0 -(NULL)). Therefore address_in_range correctly reports that obmalloc -controls P. - -Now suppose obmalloc does not control P (e.g., P was obtained via a direct -call to the system malloc() or realloc()). (POOL)->arenaindex may be anything -in this case -- it may even be uninitialized trash. If the trash arenaindex -is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't -control P. - -Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an -allocated arena, obmalloc controls all the memory in slice AO.address : -AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc, -so P doesn't lie in that slice, so the macro correctly reports that P is not -controlled by obmalloc. - -Finally, if P is not controlled by obmalloc and AO corresponds to an unused -arena_object (one not currently associated with an allocated arena), -AO.address is 0, and the second test in the macro reduces to: - - P < ARENA_SIZE - -If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes -that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part -of the test still passes, and the third clause (AO.address != 0) is necessary -to get the correct result: AO.address is 0 in this case, so the macro -correctly reports that P is not controlled by obmalloc (despite that P lies in -slice AO.address : AO.address + ARENA_SIZE). - -Note: The third (AO.address != 0) clause was added in Python 2.5. Before -2.5, arenas were never free()'ed, and an arenaindex < maxarena always -corresponded to a currently-allocated arena, so the "P is not controlled by -obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case -was impossible. - -Note that the logic is excruciating, and reading up possibly uninitialized -memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex) -creates problems for some memory debuggers. The overwhelming advantage is -that this test determines whether an arbitrary address is controlled by -obmalloc in a small constant time, independent of the number of arenas -obmalloc controls. Since this test is needed at every entry point, it's -extremely desirable that it be this fast. -*/ - -static bool _Py_NO_SANITIZE_ADDRESS - _Py_NO_SANITIZE_THREAD - _Py_NO_SANITIZE_MEMORY +static bool address_in_range(void *p, poolp pool) { - // Since address_in_range may be reading from memory which was not allocated - // by Python, it is important that pool->arenaindex is read only once, as - // another thread may be concurrently modifying the value without holding - // the GIL. The following dance forces the compiler to read pool->arenaindex - // only once. - uint arenaindex = *((volatile uint *)&pool->arenaindex); - return arenaindex < maxarenas && - (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && - arenas[arenaindex].address != 0; + return arena_map_is_marked(p); } -#endif /* !WITH_RADIX_TREE */ + /*==========================================================================*/ @@ -1821,10 +1954,8 @@ insert_to_freepool(poolp pool) ao->nextarena = unused_arena_objects; unused_arena_objects = ao; -#ifdef WITH_RADIX_TREE /* mark arena as not under control of obmalloc */ arena_map_mark_used(ao->address, 0); -#endif /* Free the entire arena. */ _PyObject_Arena.free(_PyObject_Arena.ctx, @@ -2069,6 +2200,8 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) return PyMem_RawRealloc(ptr, nbytes); } + + #else /* ! WITH_PYMALLOC */ /*==========================================================================*/ @@ -2769,7 +2902,7 @@ _PyObject_DebugMallocStats(FILE *out) (void)printone(out, "# arenas reclaimed", ntimes_arena_allocated - narenas); (void)printone(out, "# arenas highwater mark", narenas_highwater); (void)printone(out, "# arenas allocated current", narenas); -#ifdef WITH_RADIX_TREE +#ifdef USE_INTERIOR_NODES (void)printone(out, "# arena map level 1 nodes", arena_map1_count); (void)printone(out, "# arena map level 2 nodes", arena_map2_count); fputc('\n', out); @@ -2798,256 +2931,4 @@ _PyObject_DebugMallocStats(FILE *out) } -#ifdef WITH_RADIX_TREE -/* radix tree for tracking arena coverage - - key format (2^20 arena size) - 15 -> MAP1 - 15 -> MAP2 - 14 -> MAP3 - 20 -> ideal aligned arena - ---- - 64 - - key format (2^20 arena size) - 12 -> MAP3 - 20 -> ideal aligned arena - ---- - 32 - -*/ - -#if SIZEOF_VOID_P == 8 -/* number of bits in a pointer */ -#define BITS 64 - -/* Current 64-bit processors are limited to 48-bit physical addresses. For - * now, the top 17 bits of addresses will all be equal to bit 2**47. If that - * changes in the future, this must be adjusted upwards. - */ -#define PHYSICAL_BITS 48 - -/* need more layers of tree */ -#define USE_INTERIOR_NODES - -#define arena_root_t arena_map1_t - -#elif SIZEOF_VOID_P == 4 -#define BITS 32 -#define PHYSICAL_BITS 32 - -#else - - /* Currently this code works for 64-bit or 32-bit pointers only. */ -#error "obmalloc radix tree requires 64-bit or 32-bit pointers." - -#endif /* SIZEOF_VOID_P */ - - -#define ARENA_MASK (ARENA_SIZE - 1) - -/* arena_coverage_t members require this to be true */ -#if ARENA_BITS >= 32 -# error "arena size must be < 2^32" -#endif - -#ifdef USE_INTERIOR_NODES -/* bits used for MAP1 and MAP2 nodes */ -#define INTERIOR_BITS ((PHYSICAL_BITS - ARENA_BITS + 2) / 3) -#else -#define INTERIOR_BITS 0 -#endif - -#define MAP1_BITS INTERIOR_BITS -#define MAP1_LENGTH (1 << MAP1_BITS) -#define MAP1_MASK (MAP3_LENGTH - 1) - -#define MAP2_BITS INTERIOR_BITS -#define MAP2_LENGTH (1 << MAP2_BITS) -#define MAP2_MASK (MAP2_LENGTH - 1) - -#define MAP3_BITS (PHYSICAL_BITS - ARENA_BITS - 2*INTERIOR_BITS) -#define MAP3_LENGTH (1 << MAP3_BITS) -#define MAP3_MASK (MAP3_LENGTH - 1) - -#define MAP3_SHIFT ARENA_BITS -#define MAP2_SHIFT (MAP3_BITS + MAP3_SHIFT) -#define MAP1_SHIFT (MAP2_BITS + MAP2_SHIFT) - -#define AS_UINT(p) ((uintptr_t)(p)) -#define MAP3_INDEX(p) ((AS_UINT(p) >> MAP3_SHIFT) & MAP3_MASK) -#define MAP2_INDEX(p) ((AS_UINT(p) >> MAP2_SHIFT) & MAP2_MASK) -#define MAP1_INDEX(p) ((AS_UINT(p) >> MAP1_SHIFT) & MAP1_MASK) -#if PHYSICAL_BITS > BITS -#define HIGH_BITS(p) (AS_UINT(p) >> PHYSICAL_BITS) -#else -#define HIGH_BITS(p) 0 -#endif - - -/* See arena_map_mark_used() for the meaning of these members. */ -typedef struct { - int32_t tail_hi; - int32_t tail_lo; -} arena_coverage_t; - -typedef struct arena_map3 { - /* The members tail_hi and tail_lo are accessed together. So, it - * better to have them as an array of structs, rather than two - * arrays. - */ - arena_coverage_t arenas[MAP3_LENGTH]; -} arena_map3_t; - -#ifdef USE_INTERIOR_NODES -typedef struct arena_map2 { - struct arena_map3 *ptrs[MAP2_LENGTH]; -} arena_map2_t; - -typedef struct arena_map1 { - struct arena_map2 *ptrs[MAP1_LENGTH]; -} arena_map1_t; -#endif - -/* The root of tree (MAP1) and contains all MAP2 nodes. Note that by - * initializing like this, the memory should be in the BSS. The OS will - * only map pages as the MAP2 nodes get used (OS pages are demand loaded - * as needed). - */ -#ifdef USE_INTERIOR_NODES -static arena_map1_t arena_map_root; - -/* Return a pointer to a MAP3 node, return NULL if it doesn't exist - * or it cannot be created */ -static arena_map3_t * -arena_map_get(block *p, int create) -{ - /* sanity check that PHYSICAL_BITS is correct */ - assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root)); - int i1 = MAP1_INDEX(p); - if (arena_map_root.ptrs[i1] == NULL) { - if (!create) { - return NULL; - } - arena_map2_t *n = PyMem_RawCalloc(1, sizeof(arena_map2_t)); - if (n == NULL) { - return NULL; - } - arena_map_root.ptrs[i1] = n; - arena_map1_count++; - } - int i2 = MAP2_INDEX(p); - if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) { - if (!create) { - return NULL; - } - arena_map3_t *n = PyMem_RawCalloc(1, sizeof(arena_map3_t)); - if (n == NULL) { - return NULL; - } - arena_map_root.ptrs[i1]->ptrs[i2] = n; - arena_map2_count++; - } - return arena_map_root.ptrs[i1]->ptrs[i2]; -} - -#else /* !USE_INTERIOR_NODES */ -static arena_map3_t arena_map_root; - -/* Return a pointer to a MAP3 node, return NULL if it doesn't exist - * or it cannot be created */ -static arena_map3_t * -arena_map_get(block *p, int create) -{ - return &arena_map_root; -} - -#endif - - -/* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw - * away 24 bits of the address. That reduces the space requirement of - * the tree compared to similar radix tree page-map schemes. In - * exchange for slashing the space requirement, it needs more - * computation to check an address. - * - * Tracking coverage is done by "ideal" arena address. It is easier to - * explain in decimal so let's say that the arena size is 100 bytes. - * Then, ideal addresses are 100, 200, 300, etc. For checking if a - * pointer address is inside an actual arena, we have to check two ideal - * arena addresses. E.g. if pointer is 357, we need to check 200 and - * 300. In the rare case that an arena is aligned in the ideal way - * (e.g. base address of arena is 200) then we only have to check one - * ideal address. - * - * The tree nodes for 200 and 300 both store the address of arena. - * There are two cases: the arena starts at a lower ideal arena and - * extends to this one, or the arena starts in this arena and extends to - * the next ideal arena. The tail_lo and tail_hi members correspond to - * these two cases. - */ - - -/* mark or unmark addresses covered by arena */ -static int -arena_map_mark_used(uintptr_t arena_base, int is_used) -{ - /* sanity check that PHYSICAL_BITS is correct */ - assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); - arena_map3_t *n_hi = arena_map_get((block *)arena_base, is_used); - if (n_hi == NULL) { - assert(is_used); /* otherwise node should already exist */ - return 0; /* failed to allocate space for node */ - } - int i3 = MAP3_INDEX((block *)arena_base); - int32_t tail = (int32_t)(arena_base & ARENA_MASK); - if (tail == 0) { - /* is ideal arena address */ - n_hi->arenas[i3].tail_hi = is_used ? -1 : 0; - } - else { - /* arena_base address is not ideal (aligned to arena size) and - * so it potentially covers two MAP3 nodes. Get the MAP3 node - * for the next arena. Note that it might be in different MAP1 - * and MAP2 nodes as well so we need to call arena_map_get() - * again (do the full tree traversal). - */ - n_hi->arenas[i3].tail_hi = is_used ? tail : 0; - uintptr_t arena_base_next = arena_base + ARENA_SIZE; - /* If arena_base is a legit arena address, so is arena_base_next - 1 - * (last address in arena). If arena_base_next overflows then it - * must overflow to 0. However, that would mean arena_base was - * "ideal" and we should not be in this case. */ - assert(arena_base < arena_base_next); - arena_map3_t *n_lo = arena_map_get((block *)arena_base_next, is_used); - if (n_lo == NULL) { - assert(is_used); /* otherwise should already exist */ - n_hi->arenas[i3].tail_hi = 0; - return 0; /* failed to allocate space for node */ - } - int i3_next = MAP3_INDEX(arena_base_next); - n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; - } - return 1; -} - -/* Return true if 'p' is a pointer inside an obmalloc arena. - * _PyObject_Free() calls this so it needs to be very fast. */ -static int -arena_map_is_marked(block *p) -{ - arena_map3_t *n = arena_map_get(p, 0); - if (n == NULL) { - return 0; - } - int i3 = MAP3_INDEX(p); - /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */ - int32_t hi = n->arenas[i3].tail_hi; - int32_t lo = n->arenas[i3].tail_lo; - int32_t tail = (int32_t)(AS_UINT(p) & ARENA_MASK); - return (tail < lo) || (tail >= hi && hi != 0); -} - -#endif /* WITH_RADIX_TREE */ - #endif /* #ifdef WITH_PYMALLOC */ From 21c1679724ed8197f73c72b18ce1b6926f235e73 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 21 Feb 2021 13:56:55 -0800 Subject: [PATCH 10/17] wip: code cleanup, renames, comments --- Objects/obmalloc.c | 166 +++++++++++++++++++++------------------------ 1 file changed, 79 insertions(+), 87 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 6aa88dd4fda0268..5c997aa1d20eae7 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -909,6 +909,7 @@ static int running_on_valgrind = -1; */ #define ARENA_BITS 18 #define ARENA_SIZE (1 << ARENA_BITS) /* 256 KiB */ +#define ARENA_SIZE_MASK (ARENA_SIZE - 1) #ifdef WITH_MEMORY_LIMITS #define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) @@ -1235,18 +1236,21 @@ _Py_GetAllocatedBlocks(void) } /*==========================================================================*/ -/* radix tree for tracking arena coverage +/* radix tree for tracking arena usage - key format (2^20 arena size) - 15 -> MAP1 - 15 -> MAP2 - 14 -> MAP3 + bit allocation for keys (2^20 arena size) + + 64-bit pointers: + 16 -> ignored (BITS - PHYSICAL_BITS) + 10 -> MAP_TOP + 10 -> MAP_MID + 8 -> MAP_BOT 20 -> ideal aligned arena ---- 64 - key format (2^20 arena size) - 12 -> MAP3 + 32-bit pointers: + 12 -> MAP_BOT 20 -> ideal aligned arena ---- 32 @@ -1264,11 +1268,9 @@ _Py_GetAllocatedBlocks(void) */ #define PHYSICAL_BITS 48 -/* need more layers of radix tree */ +/* use the top and mid layers of the radix tree */ #define USE_INTERIOR_NODES -#define arena_root_t arena_map1_t - #elif SIZEOF_VOID_P == 4 #define BITS 32 @@ -1281,130 +1283,123 @@ _Py_GetAllocatedBlocks(void) #endif /* SIZEOF_VOID_P */ -#define ARENA_MASK (ARENA_SIZE - 1) - /* arena_coverage_t members require this to be true */ #if ARENA_BITS >= 32 # error "arena size must be < 2^32" #endif #ifdef USE_INTERIOR_NODES -/* bits used for MAP1 and MAP2 nodes */ +/* number of bits used for MAP_TOP and MAP_MID nodes */ #define INTERIOR_BITS ((PHYSICAL_BITS - ARENA_BITS + 2) / 3) #else #define INTERIOR_BITS 0 #endif -#define MAP1_BITS INTERIOR_BITS -#define MAP1_LENGTH (1 << MAP1_BITS) -#define MAP1_MASK (MAP3_LENGTH - 1) +#define MAP_TOP_BITS INTERIOR_BITS +#define MAP_TOP_LENGTH (1 << MAP_TOP_BITS) +#define MAP_TOP_MASK (MAP_BOT_LENGTH - 1) -#define MAP2_BITS INTERIOR_BITS -#define MAP2_LENGTH (1 << MAP2_BITS) -#define MAP2_MASK (MAP2_LENGTH - 1) +#define MAP_MID_BITS INTERIOR_BITS +#define MAP_MID_LENGTH (1 << MAP_MID_BITS) +#define MAP_MID_MASK (MAP_MID_LENGTH - 1) -#define MAP3_BITS (PHYSICAL_BITS - ARENA_BITS - 2*INTERIOR_BITS) -#define MAP3_LENGTH (1 << MAP3_BITS) -#define MAP3_MASK (MAP3_LENGTH - 1) +#define MAP_BOT_BITS (PHYSICAL_BITS - ARENA_BITS - 2*INTERIOR_BITS) +#define MAP_BOT_LENGTH (1 << MAP_BOT_BITS) +#define MAP_BOT_MASK (MAP_BOT_LENGTH - 1) -#define MAP3_SHIFT ARENA_BITS -#define MAP2_SHIFT (MAP3_BITS + MAP3_SHIFT) -#define MAP1_SHIFT (MAP2_BITS + MAP2_SHIFT) +#define MAP_BOT_SHIFT ARENA_BITS +#define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT) +#define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT) #define AS_UINT(p) ((uintptr_t)(p)) -#define MAP3_INDEX(p) ((AS_UINT(p) >> MAP3_SHIFT) & MAP3_MASK) -#define MAP2_INDEX(p) ((AS_UINT(p) >> MAP2_SHIFT) & MAP2_MASK) -#define MAP1_INDEX(p) ((AS_UINT(p) >> MAP1_SHIFT) & MAP1_MASK) +#define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK) +#define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK) +#define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK) #if PHYSICAL_BITS > BITS +/* Return non-physical bits of pointer. Should be same for all valid + * pointers if PHYSICAL_BITS set correctly. */ #define HIGH_BITS(p) (AS_UINT(p) >> PHYSICAL_BITS) #else #define HIGH_BITS(p) 0 #endif -/* See arena_map_mark_used() for the meaning of these members. */ +/* This is the leaf of the radix tree. See arena_map_mark_used() for the + * meaning of these members. */ typedef struct { int32_t tail_hi; int32_t tail_lo; } arena_coverage_t; -typedef struct arena_map3 { +typedef struct arena_map_bot { /* The members tail_hi and tail_lo are accessed together. So, it * better to have them as an array of structs, rather than two * arrays. */ - arena_coverage_t arenas[MAP3_LENGTH]; -} arena_map3_t; + arena_coverage_t arenas[MAP_BOT_LENGTH]; +} arena_map_bot_t; #ifdef USE_INTERIOR_NODES -typedef struct arena_map2 { - struct arena_map3 *ptrs[MAP2_LENGTH]; -} arena_map2_t; +typedef struct arena_map_mid { + struct arena_map_bot *ptrs[MAP_MID_LENGTH]; +} arena_map_mid_t; -typedef struct arena_map1 { - struct arena_map2 *ptrs[MAP1_LENGTH]; -} arena_map1_t; +typedef struct arena_map_top { + struct arena_map_mid *ptrs[MAP_TOP_LENGTH]; +} arena_map_top_t; #endif -/* The root of tree (MAP1) and contains all MAP2 nodes. Note that by - * initializing like this, the memory should be in the BSS. The OS will - * only map pages as the MAP2 nodes get used (OS pages are demand loaded - * as needed). +/* The root of radix tree. Note that by initializing like this, the memory + * should be in the BSS. The OS will only memory map pages as the MAP_MID + * nodes get used (OS pages are demand loaded as needed). */ #ifdef USE_INTERIOR_NODES -static arena_map1_t arena_map_root; - -/* number of used radix tree nodes */ -static int arena_map1_count; -static int arena_map2_count; +static arena_map_top_t arena_map_root; +/* accounting for number of used interior nodes */ +static int arena_map_top_count; +static int arena_map_mid_count; +#else +static arena_map_bot_t arena_map_root; +#endif -/* Return a pointer to a MAP3 node, return NULL if it doesn't exist - * or it cannot be created */ -static arena_map3_t * +/* Return a pointer to a bottom tree node, return NULL if it doesn't exist or + * it cannot be created */ +static arena_map_bot_t * arena_map_get(block *p, int create) { +#ifdef USE_INTERIOR_NODES /* sanity check that PHYSICAL_BITS is correct */ assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root)); - int i1 = MAP1_INDEX(p); + int i1 = MAP_TOP_INDEX(p); if (arena_map_root.ptrs[i1] == NULL) { if (!create) { return NULL; } - arena_map2_t *n = PyMem_RawCalloc(1, sizeof(arena_map2_t)); + arena_map_mid_t *n = PyMem_RawCalloc(1, sizeof(arena_map_mid_t)); if (n == NULL) { return NULL; } arena_map_root.ptrs[i1] = n; - arena_map1_count++; + arena_map_top_count++; } - int i2 = MAP2_INDEX(p); + int i2 = MAP_MID_INDEX(p); if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) { if (!create) { return NULL; } - arena_map3_t *n = PyMem_RawCalloc(1, sizeof(arena_map3_t)); + arena_map_bot_t *n = PyMem_RawCalloc(1, sizeof(arena_map_bot_t)); if (n == NULL) { return NULL; } arena_map_root.ptrs[i1]->ptrs[i2] = n; - arena_map2_count++; + arena_map_mid_count++; } return arena_map_root.ptrs[i1]->ptrs[i2]; -} - -#else /* !USE_INTERIOR_NODES */ -static arena_map3_t arena_map_root; - -/* Return a pointer to a MAP3 node, return NULL if it doesn't exist - * or it cannot be created */ -static arena_map3_t * -arena_map_get(block *p, int create) -{ +#else return &arena_map_root; -} - #endif +} /* The radix tree only tracks arenas. So, for 16 MiB arenas, we throw @@ -1436,22 +1431,22 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) { /* sanity check that PHYSICAL_BITS is correct */ assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); - arena_map3_t *n_hi = arena_map_get((block *)arena_base, is_used); + arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used); if (n_hi == NULL) { assert(is_used); /* otherwise node should already exist */ return 0; /* failed to allocate space for node */ } - int i3 = MAP3_INDEX((block *)arena_base); - int32_t tail = (int32_t)(arena_base & ARENA_MASK); + int i3 = MAP_BOT_INDEX((block *)arena_base); + int32_t tail = (int32_t)(arena_base & ARENA_SIZE_MASK); if (tail == 0) { /* is ideal arena address */ n_hi->arenas[i3].tail_hi = is_used ? -1 : 0; } else { /* arena_base address is not ideal (aligned to arena size) and - * so it potentially covers two MAP3 nodes. Get the MAP3 node - * for the next arena. Note that it might be in different MAP1 - * and MAP2 nodes as well so we need to call arena_map_get() + * so it potentially covers two MAP_BOT nodes. Get the MAP_BOT node + * for the next arena. Note that it might be in different MAP_TOP + * and MAP_MID nodes as well so we need to call arena_map_get() * again (do the full tree traversal). */ n_hi->arenas[i3].tail_hi = is_used ? tail : 0; @@ -1461,13 +1456,13 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) * must overflow to 0. However, that would mean arena_base was * "ideal" and we should not be in this case. */ assert(arena_base < arena_base_next); - arena_map3_t *n_lo = arena_map_get((block *)arena_base_next, is_used); + arena_map_bot_t *n_lo = arena_map_get((block *)arena_base_next, is_used); if (n_lo == NULL) { assert(is_used); /* otherwise should already exist */ n_hi->arenas[i3].tail_hi = 0; return 0; /* failed to allocate space for node */ } - int i3_next = MAP3_INDEX(arena_base_next); + int i3_next = MAP_BOT_INDEX(arena_base_next); n_lo->arenas[i3_next].tail_lo = is_used ? tail : 0; } return 1; @@ -1476,17 +1471,17 @@ arena_map_mark_used(uintptr_t arena_base, int is_used) /* Return true if 'p' is a pointer inside an obmalloc arena. * _PyObject_Free() calls this so it needs to be very fast. */ static int -arena_map_is_marked(block *p) +arena_map_is_used(block *p) { - arena_map3_t *n = arena_map_get(p, 0); + arena_map_bot_t *n = arena_map_get(p, 0); if (n == NULL) { return 0; } - int i3 = MAP3_INDEX(p); + int i3 = MAP_BOT_INDEX(p); /* ARENA_BITS must be < 32 so that the tail is a non-negative int32_t. */ int32_t hi = n->arenas[i3].tail_hi; int32_t lo = n->arenas[i3].tail_lo; - int32_t tail = (int32_t)(AS_UINT(p) & ARENA_MASK); + int32_t tail = (int32_t)(AS_UINT(p) & ARENA_SIZE_MASK); return (tail < lo) || (tail >= hi && hi != 0); } @@ -1606,7 +1601,7 @@ new_arena(void) static bool address_in_range(void *p, poolp pool) { - return arena_map_is_marked(p); + return arena_map_is_used(p); } @@ -1954,7 +1949,7 @@ insert_to_freepool(poolp pool) ao->nextarena = unused_arena_objects; unused_arena_objects = ao; - /* mark arena as not under control of obmalloc */ + /* mark arena region as not under control of obmalloc */ arena_map_mark_used(ao->address, 0); /* Free the entire arena. */ @@ -2200,8 +2195,6 @@ _PyObject_Realloc(void *ctx, void *ptr, size_t nbytes) return PyMem_RawRealloc(ptr, nbytes); } - - #else /* ! WITH_PYMALLOC */ /*==========================================================================*/ @@ -2903,8 +2896,8 @@ _PyObject_DebugMallocStats(FILE *out) (void)printone(out, "# arenas highwater mark", narenas_highwater); (void)printone(out, "# arenas allocated current", narenas); #ifdef USE_INTERIOR_NODES - (void)printone(out, "# arena map level 1 nodes", arena_map1_count); - (void)printone(out, "# arena map level 2 nodes", arena_map2_count); + (void)printone(out, "# arena map top nodes", arena_map_top_count); + (void)printone(out, "# arena map mid nodes", arena_map_mid_count); fputc('\n', out); #endif @@ -2930,5 +2923,4 @@ _PyObject_DebugMallocStats(FILE *out) return 1; } - #endif /* #ifdef WITH_PYMALLOC */ From c696d2054482bba12658f11f6235449abc554883 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 21 Feb 2021 12:26:22 -0800 Subject: [PATCH 11/17] wip: increase pool and arena sizes by 4x --- Objects/obmalloc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 5c997aa1d20eae7..d047b0be948b2d1 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -907,8 +907,8 @@ static int running_on_valgrind = -1; * Arenas are allocated with mmap() on systems supporting anonymous memory * mappings to reduce heap fragmentation. */ -#define ARENA_BITS 18 -#define ARENA_SIZE (1 << ARENA_BITS) /* 256 KiB */ +#define ARENA_BITS 20 +#define ARENA_SIZE (1 << ARENA_BITS) /* 1 MiB */ #define ARENA_SIZE_MASK (ARENA_SIZE - 1) #ifdef WITH_MEMORY_LIMITS @@ -918,8 +918,8 @@ static int running_on_valgrind = -1; /* * Size of the pools used for small blocks. Must be a power of 2. */ -#define POOL_BITS 12 /* 4 KiB */ -#define POOL_SIZE (1 << POOL_BITS) /* 4 KiB */ +#define POOL_BITS 14 /* 16 KiB */ +#define POOL_SIZE (1 << POOL_BITS) #define POOL_SIZE_MASK (POOL_SIZE - 1) #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE) From 4df42255c11e52d88ade2cad25e2961f881476a5 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 21 Feb 2021 14:19:40 -0800 Subject: [PATCH 12/17] Update blurb entry. --- .../2019-06-29-16-27-23.bpo-37448.QI5V97.rst | 8 -------- .../2021-02-21-14-19-35.bpo-37448.btl7vO.rst | 12 ++++++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) delete mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-06-29-16-27-23.bpo-37448.QI5V97.rst create mode 100644 Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-06-29-16-27-23.bpo-37448.QI5V97.rst b/Misc/NEWS.d/next/Core and Builtins/2019-06-29-16-27-23.bpo-37448.QI5V97.rst deleted file mode 100644 index af17e608b09be87..000000000000000 --- a/Misc/NEWS.d/next/Core and Builtins/2019-06-29-16-27-23.bpo-37448.QI5V97.rst +++ /dev/null @@ -1,8 +0,0 @@ -Add radix tree implementation of obmalloc's address_in_range(). Use it -by default on 64-bit platforms. The radix tree eliminates the -(slightly) memory unsanitary behavior of the current address_in_range() -approach. It also has the small advantage of easily allowing larger -pool sizes. The current address_in_range() requires that pools be no -larger than the OS page size. - -Co-authored-by: Tim Peters diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst b/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst new file mode 100644 index 000000000000000..635f0f3e3682415 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst @@ -0,0 +1,12 @@ +Add a radix tree to track used obmalloc arenas. Use to replace the old +implementation of address_in_range(). The radix tree approach makes it easy +to increase pool sizes beyond the OS page size. Boosting the pool and arena +size allows obmalloc to handle a significantly higher percentage of requests +from its ultra-fast paths. + +It also has the advantage of eliminating the memory unsanitary behavior of +the previous address_in_range(). The old address_in_range() was marked with +the annotations _Py_NO_SANITIZE_ADDRESS, _Py_NO_SANITIZE_THREAD, and +_Py_NO_SANITIZE_MEMORY. Those annotations are no longer needed. + +Co-authored-by: Tim Peters From 94ad7603de3397040b11b6a948c2985e6ed6192e Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sun, 21 Feb 2021 17:54:58 -0800 Subject: [PATCH 13/17] Use smaller pool and arena sizes for 32-bit. --- Objects/obmalloc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index d047b0be948b2d1..7e559a4f44064f2 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -907,8 +907,12 @@ static int running_on_valgrind = -1; * Arenas are allocated with mmap() on systems supporting anonymous memory * mappings to reduce heap fragmentation. */ -#define ARENA_BITS 20 -#define ARENA_SIZE (1 << ARENA_BITS) /* 1 MiB */ +#if SIZEOF_VOID_P > 4 +#define ARENA_BITS 20 /* 1 MiB */ +#else +#define ARENA_BITS 18 /* 256 KiB */ +#endif +#define ARENA_SIZE (1 << ARENA_BITS) #define ARENA_SIZE_MASK (ARENA_SIZE - 1) #ifdef WITH_MEMORY_LIMITS @@ -918,7 +922,11 @@ static int running_on_valgrind = -1; /* * Size of the pools used for small blocks. Must be a power of 2. */ +#if SIZEOF_VOID_P > 4 #define POOL_BITS 14 /* 16 KiB */ +#else +#define POOL_BITS 12 /* 4 KiB */ +#endif #define POOL_SIZE (1 << POOL_BITS) #define POOL_SIZE_MASK (POOL_SIZE - 1) From 509a5c7d013ceba034cbac89fd3c0213593b0590 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Wed, 24 Feb 2021 14:47:38 -0800 Subject: [PATCH 14/17] Add USE_RADIX_TREE flag. If disabled, the old version of address_in_range() is used. --- Objects/obmalloc.c | 131 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 124 insertions(+), 7 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 7e559a4f44064f2..f5fc603607c6a98 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -894,6 +894,18 @@ static int running_on_valgrind = -1; #endif #endif +/* use radix-tree to track arena memory regions, for address_in_range() */ +#define USE_RADIX_TREE + +#if SIZEOF_VOID_P > 4 +/* on 64-bit platforms use larger pools and arenas if we can */ +#define USE_LARGE_ARENAS +#ifdef USE_RADIX_TREE +/* large pools only supported if radix-tree is enabled */ +#define USE_LARGE_POOLS +#endif +#endif + /* * The allocator sub-allocates blocks of memory (called arenas) aligned * on a page boundary. This is a reserved virtual address space for the @@ -907,7 +919,7 @@ static int running_on_valgrind = -1; * Arenas are allocated with mmap() on systems supporting anonymous memory * mappings to reduce heap fragmentation. */ -#if SIZEOF_VOID_P > 4 +#ifdef USE_LARGE_ARENAS #define ARENA_BITS 20 /* 1 MiB */ #else #define ARENA_BITS 18 /* 256 KiB */ @@ -922,7 +934,7 @@ static int running_on_valgrind = -1; /* * Size of the pools used for small blocks. Must be a power of 2. */ -#if SIZEOF_VOID_P > 4 +#ifdef USE_LARGE_POOLS #define POOL_BITS 14 /* 16 KiB */ #else #define POOL_BITS 12 /* 4 KiB */ @@ -930,6 +942,12 @@ static int running_on_valgrind = -1; #define POOL_SIZE (1 << POOL_BITS) #define POOL_SIZE_MASK (POOL_SIZE - 1) +#ifndef USE_RADIX_TREE +#if POOL_SIZE != SYSTEM_PAGE_SIZE +# error "pool size must be system page size" +#endif +#endif + #define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE) #if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE # error "arena size not an exact multiple of pool size" @@ -1243,12 +1261,13 @@ _Py_GetAllocatedBlocks(void) return n; } +#ifdef USE_RADIX_TREE /*==========================================================================*/ /* radix tree for tracking arena usage - bit allocation for keys (2^20 arena size) + bit allocation for keys - 64-bit pointers: + 64-bit pointers and 2^20 arena size: 16 -> ignored (BITS - PHYSICAL_BITS) 10 -> MAP_TOP 10 -> MAP_MID @@ -1257,9 +1276,9 @@ _Py_GetAllocatedBlocks(void) ---- 64 - 32-bit pointers: - 12 -> MAP_BOT - 20 -> ideal aligned arena + 32-bit pointers and 2^18 arena size: + 14 -> MAP_BOT + 18 -> ideal aligned arena ---- 32 @@ -1495,6 +1514,7 @@ arena_map_is_used(block *p) /* end of radix tree logic */ /*==========================================================================*/ +#endif /* USE_RADIX_TREE */ /* Allocate a new arena. If we run out of memory, return NULL. Else @@ -1565,6 +1585,7 @@ new_arena(void) unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); +#ifdef USE_RADIX_TREE if (address != NULL) { if (!arena_map_mark_used((uintptr_t)address, 1)) { /* marking arena in radix tree failed, abort */ @@ -1572,6 +1593,7 @@ new_arena(void) address = NULL; } } +#endif if (address == NULL) { /* The allocation failed: return NULL after putting the * arenaobj back. @@ -1603,6 +1625,7 @@ new_arena(void) +#ifdef USE_RADIX_TREE /* Return true if and only if P is an address that was allocated by pymalloc. When the radix tree is used, 'poolp' is unused. */ @@ -1611,7 +1634,99 @@ address_in_range(void *p, poolp pool) { return arena_map_is_used(p); } +#else +/* +address_in_range(P, POOL) + +Return true if and only if P is an address that was allocated by pymalloc. +POOL must be the pool address associated with P, i.e., POOL = POOL_ADDR(P) +(the caller is asked to compute this because the macro expands POOL more than +once, and for efficiency it's best for the caller to assign POOL_ADDR(P) to a +variable and pass the latter to the macro; because address_in_range is +called on every alloc/realloc/free, micro-efficiency is important here). + +Tricky: Let B be the arena base address associated with the pool, B = +arenas[(POOL)->arenaindex].address. Then P belongs to the arena if and only if + + B <= P < B + ARENA_SIZE + +Subtracting B throughout, this is true iff + + 0 <= P-B < ARENA_SIZE + +By using unsigned arithmetic, the "0 <=" half of the test can be skipped. + +Obscure: A PyMem "free memory" function can call the pymalloc free or realloc +before the first arena has been allocated. `arenas` is still NULL in that +case. We're relying on that maxarenas is also 0 in that case, so that +(POOL)->arenaindex < maxarenas must be false, saving us from trying to index +into a NULL arenas. + +Details: given P and POOL, the arena_object corresponding to P is AO = +arenas[(POOL)->arenaindex]. Suppose obmalloc controls P. Then (barring wild +stores, etc), POOL is the correct address of P's pool, AO.address is the +correct base address of the pool's arena, and P must be within ARENA_SIZE of +AO.address. In addition, AO.address is not 0 (no arena can start at address 0 +(NULL)). Therefore address_in_range correctly reports that obmalloc +controls P. + +Now suppose obmalloc does not control P (e.g., P was obtained via a direct +call to the system malloc() or realloc()). (POOL)->arenaindex may be anything +in this case -- it may even be uninitialized trash. If the trash arenaindex +is >= maxarenas, the macro correctly concludes at once that obmalloc doesn't +control P. + +Else arenaindex is < maxarena, and AO is read up. If AO corresponds to an +allocated arena, obmalloc controls all the memory in slice AO.address : +AO.address+ARENA_SIZE. By case assumption, P is not controlled by obmalloc, +so P doesn't lie in that slice, so the macro correctly reports that P is not +controlled by obmalloc. + +Finally, if P is not controlled by obmalloc and AO corresponds to an unused +arena_object (one not currently associated with an allocated arena), +AO.address is 0, and the second test in the macro reduces to: + + P < ARENA_SIZE + +If P >= ARENA_SIZE (extremely likely), the macro again correctly concludes +that P is not controlled by obmalloc. However, if P < ARENA_SIZE, this part +of the test still passes, and the third clause (AO.address != 0) is necessary +to get the correct result: AO.address is 0 in this case, so the macro +correctly reports that P is not controlled by obmalloc (despite that P lies in +slice AO.address : AO.address + ARENA_SIZE). + +Note: The third (AO.address != 0) clause was added in Python 2.5. Before +2.5, arenas were never free()'ed, and an arenaindex < maxarena always +corresponded to a currently-allocated arena, so the "P is not controlled by +obmalloc, AO corresponds to an unused arena_object, and P < ARENA_SIZE" case +was impossible. + +Note that the logic is excruciating, and reading up possibly uninitialized +memory when P is not controlled by obmalloc (to get at (POOL)->arenaindex) +creates problems for some memory debuggers. The overwhelming advantage is +that this test determines whether an arbitrary address is controlled by +obmalloc in a small constant time, independent of the number of arenas +obmalloc controls. Since this test is needed at every entry point, it's +extremely desirable that it be this fast. +*/ + +static bool _Py_NO_SANITIZE_ADDRESS + _Py_NO_SANITIZE_THREAD + _Py_NO_SANITIZE_MEMORY +address_in_range(void *p, poolp pool) +{ + // Since address_in_range may be reading from memory which was not allocated + // by Python, it is important that pool->arenaindex is read only once, as + // another thread may be concurrently modifying the value without holding + // the GIL. The following dance forces the compiler to read pool->arenaindex + // only once. + uint arenaindex = *((volatile uint *)&pool->arenaindex); + return arenaindex < maxarenas && + (uintptr_t)p - arenas[arenaindex].address < ARENA_SIZE && + arenas[arenaindex].address != 0; +} +#endif /* !USE_RADIX_TREE */ /*==========================================================================*/ @@ -1957,8 +2072,10 @@ insert_to_freepool(poolp pool) ao->nextarena = unused_arena_objects; unused_arena_objects = ao; +#ifdef USE_RADIX_TREE /* mark arena region as not under control of obmalloc */ arena_map_mark_used(ao->address, 0); +#endif /* Free the entire arena. */ _PyObject_Arena.free(_PyObject_Arena.ctx, From 130a7ed636e6052e5df44d164dc4886f7447d174 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Thu, 25 Feb 2021 13:18:23 -0800 Subject: [PATCH 15/17] Rename preprocessor flag, make easy to override. --- .../2021-02-21-14-19-35.bpo-37448.btl7vO.rst | 13 ++++++---- Objects/obmalloc.c | 26 +++++++++++-------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst b/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst index 635f0f3e3682415..fe771a50122f697 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2021-02-21-14-19-35.bpo-37448.btl7vO.rst @@ -1,12 +1,15 @@ -Add a radix tree to track used obmalloc arenas. Use to replace the old -implementation of address_in_range(). The radix tree approach makes it easy -to increase pool sizes beyond the OS page size. Boosting the pool and arena -size allows obmalloc to handle a significantly higher percentage of requests -from its ultra-fast paths. +Add a radix tree based memory map to track in-use obmalloc arenas. Use to +replace the old implementation of address_in_range(). The radix tree +approach makes it easy to increase pool sizes beyond the OS page size. +Boosting the pool and arena size allows obmalloc to handle a significantly +higher percentage of requests from its ultra-fast paths. It also has the advantage of eliminating the memory unsanitary behavior of the previous address_in_range(). The old address_in_range() was marked with the annotations _Py_NO_SANITIZE_ADDRESS, _Py_NO_SANITIZE_THREAD, and _Py_NO_SANITIZE_MEMORY. Those annotations are no longer needed. +To disable the radix tree map, set a preprocessor flag as follows: +`-DWITH_PYMALLOC_RADIX_TREE=0`. + Co-authored-by: Tim Peters diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index f5fc603607c6a98..0e9c8ffee645c3c 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -894,13 +894,17 @@ static int running_on_valgrind = -1; #endif #endif -/* use radix-tree to track arena memory regions, for address_in_range() */ -#define USE_RADIX_TREE +#if !defined(WITH_PYMALLOC_RADIX_TREE) +/* Use radix-tree to track arena memory regions, for address_in_range(). + * Enable by default since it allows larger pool sizes. Can be disabled + * using -DWITH_PYMALLOC_RADIX_TREE=0 */ +#define WITH_PYMALLOC_RADIX_TREE 1 +#endif #if SIZEOF_VOID_P > 4 /* on 64-bit platforms use larger pools and arenas if we can */ #define USE_LARGE_ARENAS -#ifdef USE_RADIX_TREE +#if WITH_PYMALLOC_RADIX_TREE /* large pools only supported if radix-tree is enabled */ #define USE_LARGE_POOLS #endif @@ -942,9 +946,9 @@ static int running_on_valgrind = -1; #define POOL_SIZE (1 << POOL_BITS) #define POOL_SIZE_MASK (POOL_SIZE - 1) -#ifndef USE_RADIX_TREE +#if !WITH_PYMALLOC_RADIX_TREE #if POOL_SIZE != SYSTEM_PAGE_SIZE -# error "pool size must be system page size" +# error "pool size must be equal to system page size" #endif #endif @@ -1261,7 +1265,7 @@ _Py_GetAllocatedBlocks(void) return n; } -#ifdef USE_RADIX_TREE +#if WITH_PYMALLOC_RADIX_TREE /*==========================================================================*/ /* radix tree for tracking arena usage @@ -1514,7 +1518,7 @@ arena_map_is_used(block *p) /* end of radix tree logic */ /*==========================================================================*/ -#endif /* USE_RADIX_TREE */ +#endif /* WITH_PYMALLOC_RADIX_TREE */ /* Allocate a new arena. If we run out of memory, return NULL. Else @@ -1585,7 +1589,7 @@ new_arena(void) unused_arena_objects = arenaobj->nextarena; assert(arenaobj->address == 0); address = _PyObject_Arena.alloc(_PyObject_Arena.ctx, ARENA_SIZE); -#ifdef USE_RADIX_TREE +#if WITH_PYMALLOC_RADIX_TREE if (address != NULL) { if (!arena_map_mark_used((uintptr_t)address, 1)) { /* marking arena in radix tree failed, abort */ @@ -1625,7 +1629,7 @@ new_arena(void) -#ifdef USE_RADIX_TREE +#if WITH_PYMALLOC_RADIX_TREE /* Return true if and only if P is an address that was allocated by pymalloc. When the radix tree is used, 'poolp' is unused. */ @@ -1726,7 +1730,7 @@ address_in_range(void *p, poolp pool) arenas[arenaindex].address != 0; } -#endif /* !USE_RADIX_TREE */ +#endif /* !WITH_PYMALLOC_RADIX_TREE */ /*==========================================================================*/ @@ -2072,7 +2076,7 @@ insert_to_freepool(poolp pool) ao->nextarena = unused_arena_objects; unused_arena_objects = ao; -#ifdef USE_RADIX_TREE +#if WITH_PYMALLOC_RADIX_TREE /* mark arena region as not under control of obmalloc */ arena_map_mark_used(ao->address, 0); #endif From e0924e67d4ccce2a54426c9af3678ebc4ed2074a Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Tue, 2 Mar 2021 16:34:33 -0800 Subject: [PATCH 16/17] Minor improvements to names and comments. --- Objects/obmalloc.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 0e9c8ffee645c3c..0f06b97fa7377d0 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1272,7 +1272,7 @@ _Py_GetAllocatedBlocks(void) bit allocation for keys 64-bit pointers and 2^20 arena size: - 16 -> ignored (BITS - PHYSICAL_BITS) + 16 -> ignored (POINTER_BITS - ADDRESS_BITS) 10 -> MAP_TOP 10 -> MAP_MID 8 -> MAP_BOT @@ -1291,21 +1291,21 @@ _Py_GetAllocatedBlocks(void) #if SIZEOF_VOID_P == 8 /* number of bits in a pointer */ -#define BITS 64 +#define POINTER_BITS 64 /* Current 64-bit processors are limited to 48-bit physical addresses. For * now, the top 17 bits of addresses will all be equal to bit 2**47. If that * changes in the future, this must be adjusted upwards. */ -#define PHYSICAL_BITS 48 +#define ADDRESS_BITS 48 /* use the top and mid layers of the radix tree */ #define USE_INTERIOR_NODES #elif SIZEOF_VOID_P == 4 -#define BITS 32 -#define PHYSICAL_BITS 32 +#define POINTER_BITS 32 +#define ADDRESS_BITS 32 #else @@ -1321,7 +1321,7 @@ _Py_GetAllocatedBlocks(void) #ifdef USE_INTERIOR_NODES /* number of bits used for MAP_TOP and MAP_MID nodes */ -#define INTERIOR_BITS ((PHYSICAL_BITS - ARENA_BITS + 2) / 3) +#define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3) #else #define INTERIOR_BITS 0 #endif @@ -1334,7 +1334,7 @@ _Py_GetAllocatedBlocks(void) #define MAP_MID_LENGTH (1 << MAP_MID_BITS) #define MAP_MID_MASK (MAP_MID_LENGTH - 1) -#define MAP_BOT_BITS (PHYSICAL_BITS - ARENA_BITS - 2*INTERIOR_BITS) +#define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS) #define MAP_BOT_LENGTH (1 << MAP_BOT_BITS) #define MAP_BOT_MASK (MAP_BOT_LENGTH - 1) @@ -1347,10 +1347,13 @@ _Py_GetAllocatedBlocks(void) #define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK) #define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK) -#if PHYSICAL_BITS > BITS -/* Return non-physical bits of pointer. Should be same for all valid - * pointers if PHYSICAL_BITS set correctly. */ -#define HIGH_BITS(p) (AS_UINT(p) >> PHYSICAL_BITS) +#if ADDRESS_BITS > POINTER_BITS +/* Return non-physical address bits of a pointer. Those bits should be same + * for all valid pointers if ADDRESS_BITS set correctly. Linux has support for + * 57-bit address space (Intel 5-level paging) but will not currently give + * those addresses to user space. + */ +#define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS) #else #define HIGH_BITS(p) 0 #endif @@ -1400,7 +1403,7 @@ static arena_map_bot_t * arena_map_get(block *p, int create) { #ifdef USE_INTERIOR_NODES - /* sanity check that PHYSICAL_BITS is correct */ + /* sanity check that ADDRESS_BITS is correct */ assert(HIGH_BITS(p) == HIGH_BITS(&arena_map_root)); int i1 = MAP_TOP_INDEX(p); if (arena_map_root.ptrs[i1] == NULL) { @@ -1460,7 +1463,7 @@ arena_map_get(block *p, int create) static int arena_map_mark_used(uintptr_t arena_base, int is_used) { - /* sanity check that PHYSICAL_BITS is correct */ + /* sanity check that ADDRESS_BITS is correct */ assert(HIGH_BITS(arena_base) == HIGH_BITS(&arena_map_root)); arena_map_bot_t *n_hi = arena_map_get((block *)arena_base, is_used); if (n_hi == NULL) { From b41a77a350574260c2edeb5f4c35ae8ba0e276d1 Mon Sep 17 00:00:00 2001 From: Neil Schemenauer Date: Sat, 6 Mar 2021 14:05:50 -0800 Subject: [PATCH 17/17] Improve sys._debugmallocstats() output. --- Objects/obmalloc.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Objects/obmalloc.c b/Objects/obmalloc.c index 0f06b97fa7377d0..76ff6f9c99bc9c7 100644 --- a/Objects/obmalloc.c +++ b/Objects/obmalloc.c @@ -1391,8 +1391,8 @@ typedef struct arena_map_top { #ifdef USE_INTERIOR_NODES static arena_map_top_t arena_map_root; /* accounting for number of used interior nodes */ -static int arena_map_top_count; static int arena_map_mid_count; +static int arena_map_bot_count; #else static arena_map_bot_t arena_map_root; #endif @@ -1415,7 +1415,7 @@ arena_map_get(block *p, int create) return NULL; } arena_map_root.ptrs[i1] = n; - arena_map_top_count++; + arena_map_mid_count++; } int i2 = MAP_MID_INDEX(p); if (arena_map_root.ptrs[i1]->ptrs[i2] == NULL) { @@ -1427,7 +1427,7 @@ arena_map_get(block *p, int create) return NULL; } arena_map_root.ptrs[i1]->ptrs[i2] = n; - arena_map_mid_count++; + arena_map_bot_count++; } return arena_map_root.ptrs[i1]->ptrs[i2]; #else @@ -3028,8 +3028,8 @@ _PyObject_DebugMallocStats(FILE *out) (void)printone(out, "# arenas highwater mark", narenas_highwater); (void)printone(out, "# arenas allocated current", narenas); #ifdef USE_INTERIOR_NODES - (void)printone(out, "# arena map top nodes", arena_map_top_count); (void)printone(out, "# arena map mid nodes", arena_map_mid_count); + (void)printone(out, "# arena map bot nodes", arena_map_bot_count); fputc('\n', out); #endif @@ -3051,6 +3051,15 @@ _PyObject_DebugMallocStats(FILE *out) total += printone(out, "# bytes lost to pool headers", pool_header_bytes); total += printone(out, "# bytes lost to quantization", quantization); total += printone(out, "# bytes lost to arena alignment", arena_alignment); +#ifdef WITH_PYMALLOC_RADIX_TREE + total += printone(out, "# bytes lost to arena map root", sizeof(arena_map_root)); +#endif +#ifdef USE_INTERIOR_NODES + total += printone(out, "# bytes lost to arena map mid", + sizeof(arena_map_mid_t) * arena_map_mid_count); + total += printone(out, "# bytes lost to arena map bot", + sizeof(arena_map_bot_t) * arena_map_bot_count); +#endif (void)printone(out, "Total", total); return 1; }