From 2ec8d8ab9b092527cceb24e67882ffa7a43beb7d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 4 Nov 2024 11:42:51 +0000 Subject: [PATCH 01/33] GC experiment: mark almost all reachable objects before doing collection phase --- Include/internal/pycore_gc.h | 7 ++++ Python/gc.c | 79 ++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index b85957df5a6b9f..0758d637176065 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -302,6 +302,12 @@ struct gc_generation_stats { Py_ssize_t uncollectable; }; +enum _GCPhase { + GC_PHASE_DONE = 0, + GC_PHASE_MARK = 1, + GC_PHASE_COLLECT = 2 +}; + struct _gc_runtime_state { /* List of objects that still need to be cleaned up, singly linked * via their gc headers' gc_prev pointers. */ @@ -329,6 +335,7 @@ struct _gc_runtime_state { Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ int visited_space; + int phase; #ifdef Py_GIL_DISABLED /* This is the number of objects that survived the last full diff --git a/Python/gc.c b/Python/gc.c index 028657eb8999c1..aef3a3e6252cf5 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1431,6 +1431,81 @@ completed_cycle(GCState *gcstate) gc = next; } gcstate->work_to_do = 0; + gcstate->phase = GC_PHASE_MARK; +} + +static void +gc_mark(PyThreadState *tstate, struct gc_collection_stats *stats) +{ + // TO DO -- Make this incremental + GCState *gcstate = &tstate->interp->gc; + validate_old(gcstate); + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + PyGC_Head reachable; + gc_list_init(&reachable); + // Move all reachable objects into visited space. + PyGC_Head *gc = AS_GC(tstate->interp->sysdict); + if (gc_old_space(gc) != gcstate->visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, &reachable); + } + gc = AS_GC(tstate->interp->builtins); + if (gc_old_space(gc) != gcstate->visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, &reachable); + } + // Move all objects on stacks to reachable + _PyRuntimeState *runtime = &_PyRuntime; + HEAD_LOCK(runtime); + PyThreadState* ts = PyInterpreterState_ThreadHead(tstate->interp); + HEAD_UNLOCK(runtime); + Py_ssize_t objects_marked = 0; + while (ts) { + _PyInterpreterFrame *frame = ts->current_frame; + while (frame) { + _PyStackRef *locals = frame->localsplus; + _PyStackRef *sp = frame->stackpointer; + while (sp > locals) { + sp--; + if (PyStackRef_IsNull(*sp)) { + continue; + } + PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); + if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != gcstate->visited_space) { + gc_flip_old_space(gc); + objects_marked++; + gc_list_move(gc, &reachable); + } + } + } + frame = frame->previous; + } + HEAD_LOCK(runtime); + ts = PyThreadState_Next(ts); + HEAD_UNLOCK(runtime); + } + // Transitively traverse all objects from reachable, until empty + struct container_and_flag arg = { + .container = &reachable, + .visited_space = gcstate->visited_space, + .size = 0 + }; + while (!gc_list_is_empty(&reachable)) { + PyGC_Head *gc = _PyGCHead_NEXT(&reachable); + assert(gc_old_space(gc) == gcstate->visited_space); + gc_list_move(gc, visited); + PyObject *op = FROM_GC(gc); + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); + } + validate_old(gcstate); + gcstate->work_to_do -= objects_marked; + gcstate->phase = GC_PHASE_COLLECT; } static void @@ -1438,6 +1513,10 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; + if (gcstate->phase == GC_PHASE_MARK) { + gc_mark(tstate, stats); + return; + } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; From 1fdf00ec1ab4889f40a5b8af89cfef15942feb74 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 4 Nov 2024 12:07:52 +0000 Subject: [PATCH 02/33] Add stats for objects marked --- Include/cpython/pystats.h | 1 + Python/gc.c | 10 +++++++++- Python/specialize.c | 1 + Tools/scripts/summarize_stats.py | 3 ++- 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index f1ca54839fbc38..e4c62aa18b8a46 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -99,6 +99,7 @@ typedef struct _gc_stats { uint64_t collections; uint64_t object_visits; uint64_t objects_collected; + uint64_t objects_marked; } GCStats; typedef struct _uop_stats { diff --git a/Python/gc.c b/Python/gc.c index aef3a3e6252cf5..7af7d8a8b3f74a 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1445,21 +1445,23 @@ gc_mark(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_init(&reachable); // Move all reachable objects into visited space. PyGC_Head *gc = AS_GC(tstate->interp->sysdict); + Py_ssize_t objects_marked = 0; if (gc_old_space(gc) != gcstate->visited_space) { gc_flip_old_space(gc); gc_list_move(gc, &reachable); + objects_marked++; } gc = AS_GC(tstate->interp->builtins); if (gc_old_space(gc) != gcstate->visited_space) { gc_flip_old_space(gc); gc_list_move(gc, &reachable); + objects_marked++; } // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); PyThreadState* ts = PyInterpreterState_ThreadHead(tstate->interp); HEAD_UNLOCK(runtime); - Py_ssize_t objects_marked = 0; while (ts) { _PyInterpreterFrame *frame = ts->current_frame; while (frame) { @@ -1503,8 +1505,14 @@ gc_mark(PyThreadState *tstate, struct gc_collection_stats *stats) visit_add_to_container, &arg); } + objects_marked += arg.size; validate_old(gcstate); gcstate->work_to_do -= objects_marked; +#ifdef Py_STATS + if (_Py_stats) { + GC_STAT_ADD(1, objects_marked, objects_marked); + } +#endif gcstate->phase = GC_PHASE_COLLECT; } diff --git a/Python/specialize.c b/Python/specialize.c index ae47809305a300..7c70d94e724cc2 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -230,6 +230,7 @@ print_gc_stats(FILE *out, GCStats *stats) for (int i = 0; i < NUM_GENERATIONS; i++) { fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections); fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits); + fprintf(out, "GC[%d] objects marked: %" PRIu64 "\n", i, stats[i].objects_marked); fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected); } } diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 5793e5c649d6b3..2d0f17c30200ad 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -1118,6 +1118,7 @@ def calc_gc_stats(stats: Stats) -> Rows: Count(gen["collections"]), Count(gen["objects collected"]), Count(gen["object visits"]), + Count(gen["objects marked"]), ) for (i, gen) in enumerate(gc_stats) ] @@ -1127,7 +1128,7 @@ def calc_gc_stats(stats: Stats) -> Rows: "GC collections and effectiveness", [ Table( - ("Generation:", "Collections:", "Objects collected:", "Object visits:"), + ("Generation:", "Collections:", "Objects collected:", "Object visits:", "Objects marked:"), calc_gc_stats, ) ], From 5e813c52caa58e5691b9eab6ed5f108a05fa99f7 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 4 Nov 2024 12:27:51 +0000 Subject: [PATCH 03/33] Start with mark phase --- Include/internal/pycore_gc.h | 5 ++--- Include/internal/pycore_runtime_init.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 0758d637176065..22c4b4afeab563 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -303,9 +303,8 @@ struct gc_generation_stats { }; enum _GCPhase { - GC_PHASE_DONE = 0, - GC_PHASE_MARK = 1, - GC_PHASE_COLLECT = 2 + GC_PHASE_MARK = 0, + GC_PHASE_COLLECT = 1 }; struct _gc_runtime_state { diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index e99febab2f3d57..60faa703858490 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -132,6 +132,7 @@ extern PyTypeObject _PyExc_MemoryError; { .threshold = 0, }, \ }, \ .work_to_do = -5000, \ + .phase = 0, \ }, \ .qsbr = { \ .wr_seq = QSBR_INITIAL, \ From 8bd7606d90c8b2a1e610a510b3a0836942bfaac0 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 5 Nov 2024 10:40:30 +0000 Subject: [PATCH 04/33] Add stats for visits during marking --- Include/cpython/pystats.h | 1 + Python/gc.c | 12 +++++++++++- Python/specialize.c | 1 + Tools/scripts/summarize_stats.py | 3 ++- 4 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index e4c62aa18b8a46..2cc919c107c036 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -98,6 +98,7 @@ typedef struct _object_stats { typedef struct _gc_stats { uint64_t collections; uint64_t object_visits; + uint64_t mark_visits; uint64_t objects_collected; uint64_t objects_marked; } GCStats; diff --git a/Python/gc.c b/Python/gc.c index 7af7d8a8b3f74a..a4e38790b7720c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1360,6 +1360,7 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space) struct container_and_flag { PyGC_Head *container; int visited_space; + int mark; uintptr_t size; }; @@ -1367,8 +1368,15 @@ struct container_and_flag { static int visit_add_to_container(PyObject *op, void *arg) { - OBJECT_STAT_INC(object_visits); struct container_and_flag *cf = (struct container_and_flag *)arg; +#ifdef Py_STATS + if (cf->mark) { + GC_STAT_ADD(1, mark_visits, 1); + } + else { + OBJECT_STAT_INC(object_visits); + } +#endif int visited = cf->visited_space; assert(visited == get_gc_state()->visited_space); if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { @@ -1390,6 +1398,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat struct container_and_flag arg = { .container = container, .visited_space = gcstate->visited_space, + .mark = 0, .size = 0 }; assert(GC_NEXT(gc) == container); @@ -1493,6 +1502,7 @@ gc_mark(PyThreadState *tstate, struct gc_collection_stats *stats) struct container_and_flag arg = { .container = &reachable, .visited_space = gcstate->visited_space, + .mark = 1, .size = 0 }; while (!gc_list_is_empty(&reachable)) { diff --git a/Python/specialize.c b/Python/specialize.c index 7c70d94e724cc2..ebb3565bc53c4b 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -229,6 +229,7 @@ print_gc_stats(FILE *out, GCStats *stats) { for (int i = 0; i < NUM_GENERATIONS; i++) { fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections); + fprintf(out, "GC[%d] marking visits: %" PRIu64 "\n", i, stats[i].mark_visits); fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits); fprintf(out, "GC[%d] objects marked: %" PRIu64 "\n", i, stats[i].objects_marked); fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected); diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 2d0f17c30200ad..6938ee492d68ae 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -1118,6 +1118,7 @@ def calc_gc_stats(stats: Stats) -> Rows: Count(gen["collections"]), Count(gen["objects collected"]), Count(gen["object visits"]), + Count(gen["marking visits"]), Count(gen["objects marked"]), ) for (i, gen) in enumerate(gc_stats) @@ -1128,7 +1129,7 @@ def calc_gc_stats(stats: Stats) -> Rows: "GC collections and effectiveness", [ Table( - ("Generation:", "Collections:", "Objects collected:", "Object visits:", "Objects marked:"), + ("Generation:", "Collections:", "Objects collected:", "Object visits:", "Marking visits:", "Objects marked:"), calc_gc_stats, ) ], From 3513da2b0cd0e87f81693ff15945bab295648d2f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Tue, 5 Nov 2024 15:55:45 +0000 Subject: [PATCH 05/33] Visit new frames before each increment --- Include/internal/pycore_frame.h | 3 + Include/internal/pycore_object.h | 4 +- Python/gc.c | 161 ++++++++++++++++++++----------- 3 files changed, 111 insertions(+), 57 deletions(-) diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h index c9ac3819d0390b..901faed18a6f2e 100644 --- a/Include/internal/pycore_frame.h +++ b/Include/internal/pycore_frame.h @@ -71,6 +71,7 @@ typedef struct _PyInterpreterFrame { _PyStackRef *stackpointer; uint16_t return_offset; /* Only relevant during a function call */ char owner; + char visited; /* Locals and stack */ _PyStackRef localsplus[1]; } _PyInterpreterFrame; @@ -165,6 +166,7 @@ _PyFrame_Initialize( frame->instr_ptr = _PyCode_CODE(code); frame->return_offset = 0; frame->owner = FRAME_OWNED_BY_THREAD; + frame->visited = 0; for (int i = null_locals_from; i < code->co_nlocalsplus; i++) { frame->localsplus[i] = PyStackRef_NULL; @@ -341,6 +343,7 @@ _PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int frame->frame_obj = NULL; frame->instr_ptr = _PyCode_CODE(code); frame->owner = FRAME_OWNED_BY_THREAD; + frame->visited = 0; frame->return_offset = 0; #ifdef Py_GIL_DISABLED diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h index c7af720b1ce43d..0cba7bc083087e 100644 --- a/Include/internal/pycore_object.h +++ b/Include/internal/pycore_object.h @@ -466,8 +466,8 @@ static inline void _PyObject_GC_TRACK( PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); _PyGCHead_SET_NEXT(last, gc); _PyGCHead_SET_PREV(gc, last); - /* Young objects will be moved into the visited space during GC, so set the bit here */ - gc->_gc_next = ((uintptr_t)generation0) | (uintptr_t)interp->gc.visited_space; + uintptr_t not_visited = 1 ^ interp->gc.visited_space; + gc->_gc_next = ((uintptr_t)generation0) | not_visited; generation0->_gc_prev = (uintptr_t)gc; #endif } diff --git a/Python/gc.c b/Python/gc.c index a4e38790b7720c..1a3f22879e1b69 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1320,6 +1320,7 @@ gc_collect_young(PyThreadState *tstate, PyGC_Head survivors; gc_list_init(&survivors); + gc_list_set_space(young, gcstate->visited_space); gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats); Py_ssize_t survivor_count = 0; if (gcstate->visited_space) { @@ -1407,6 +1408,7 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat * have been marked as visited */ assert(IS_IN_VISITED(gc, gcstate->visited_space)); PyObject *op = FROM_GC(gc); + assert(_PyObject_GC_IS_TRACKED(op)); if (_Py_IsImmortal(op)) { PyGC_Head *next = GC_NEXT(gc); gc_list_move(gc, &get_gc_state()->permanent_generation.head); @@ -1426,56 +1428,95 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat static void completed_cycle(GCState *gcstate) { -#ifdef Py_DEBUG - PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; - assert(gc_list_is_empty(not_visited)); -#endif - gcstate->visited_space = flip_old_space(gcstate->visited_space); + assert(gc_list_is_empty(&gcstate->old[gcstate->visited_space^1].head)); + int not_visited = gcstate->visited_space; + gcstate->visited_space = flip_old_space(not_visited); /* Make sure all young objects have old space bit set correctly */ PyGC_Head *young = &gcstate->young.head; PyGC_Head *gc = GC_NEXT(young); while (gc != young) { PyGC_Head *next = GC_NEXT(gc); - gc_set_old_space(gc, gcstate->visited_space); + gc_set_old_space(gc, not_visited); gc = next; } gcstate->work_to_do = 0; gcstate->phase = GC_PHASE_MARK; } -static void -gc_mark(PyThreadState *tstate, struct gc_collection_stats *stats) +static int +move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) +{ + if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { + PyGC_Head *gc = AS_GC(op); + if (_PyObject_GC_IS_TRACKED(op) && + gc_old_space(gc) != visited_space) { + gc_flip_old_space(gc); + gc_list_move(gc, reachable); + return 1; + } + } + return 0; +} + +static int +mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) +{ + // Transitively traverse all objects from reachable, until empty + struct container_and_flag arg = { + .container = reachable, + .visited_space = visited_space, + .mark = 1, + .size = 0 + }; + while (!gc_list_is_empty(reachable)) { + PyGC_Head *gc = _PyGCHead_NEXT(reachable); + assert(gc_old_space(gc) == visited_space); + gc_list_move(gc, visited); + PyObject *op = FROM_GC(gc); + traverseproc traverse = Py_TYPE(op)->tp_traverse; + (void) traverse(op, + visit_add_to_container, + &arg); + } + gc_list_validate_space(visited, visited_space); + return arg.size; +} + +static int +mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) +{ + PyGC_Head reachable; + gc_list_init(&reachable); + Py_ssize_t objects_marked = 0; + objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); + objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); + objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static int +mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { - // TO DO -- Make this incremental - GCState *gcstate = &tstate->interp->gc; - validate_old(gcstate); - PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head reachable; gc_list_init(&reachable); - // Move all reachable objects into visited space. - PyGC_Head *gc = AS_GC(tstate->interp->sysdict); Py_ssize_t objects_marked = 0; - if (gc_old_space(gc) != gcstate->visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, &reachable); - objects_marked++; - } - gc = AS_GC(tstate->interp->builtins); - if (gc_old_space(gc) != gcstate->visited_space) { - gc_flip_old_space(gc); - gc_list_move(gc, &reachable); - objects_marked++; - } // Move all objects on stacks to reachable _PyRuntimeState *runtime = &_PyRuntime; HEAD_LOCK(runtime); - PyThreadState* ts = PyInterpreterState_ThreadHead(tstate->interp); + PyThreadState* ts = PyInterpreterState_ThreadHead(interp); HEAD_UNLOCK(runtime); while (ts) { _PyInterpreterFrame *frame = ts->current_frame; while (frame) { _PyStackRef *locals = frame->localsplus; _PyStackRef *sp = frame->stackpointer; + if (frame->owner != FRAME_OWNED_BY_CSTACK) { + objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, &reachable, visited_space); + } while (sp > locals) { sp--; if (PyStackRef_IsNull(*sp)) { @@ -1483,40 +1524,41 @@ gc_mark(PyThreadState *tstate, struct gc_collection_stats *stats) } PyObject *op = PyStackRef_AsPyObjectBorrow(*sp); if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { - PyGC_Head *gc = AS_GC(op); + PyGC_Head *gc = AS_GC(op); if (_PyObject_GC_IS_TRACKED(op) && - gc_old_space(gc) != gcstate->visited_space) { + gc_old_space(gc) != visited_space) { gc_flip_old_space(gc); objects_marked++; gc_list_move(gc, &reachable); } } } + if (!start && frame->visited) { + // If this frame has already been visited, then the lower frames + // will have already been visited and will not have changed + break; + } + frame->visited = 1; frame = frame->previous; } HEAD_LOCK(runtime); ts = PyThreadState_Next(ts); HEAD_UNLOCK(runtime); } - // Transitively traverse all objects from reachable, until empty - struct container_and_flag arg = { - .container = &reachable, - .visited_space = gcstate->visited_space, - .mark = 1, - .size = 0 - }; - while (!gc_list_is_empty(&reachable)) { - PyGC_Head *gc = _PyGCHead_NEXT(&reachable); - assert(gc_old_space(gc) == gcstate->visited_space); - gc_list_move(gc, visited); - PyObject *op = FROM_GC(gc); - traverseproc traverse = Py_TYPE(op)->tp_traverse; - (void) traverse(op, - visit_add_to_container, - &arg); - } - objects_marked += arg.size; + objects_marked += mark_all_reachable(&reachable, visited, visited_space); + assert(gc_list_is_empty(&reachable)); + return objects_marked; +} + +static void +mark_at_start(PyThreadState *tstate) +{ + // TO DO -- Make this incremental + GCState *gcstate = &tstate->interp->gc; validate_old(gcstate); + PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); + objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); gcstate->work_to_do -= objects_marked; #ifdef Py_STATS if (_Py_stats) { @@ -1531,8 +1573,9 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; + gcstate->work_to_do += gcstate->young.count; if (gcstate->phase == GC_PHASE_MARK) { - gc_mark(tstate, stats); + mark_at_start(tstate); return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; @@ -1543,6 +1586,8 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (scale_factor < 1) { scale_factor = 1; } + gcstate->work_to_do -= mark_stacks(tstate->interp, visited, gcstate->visited_space, false); + gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); @@ -1554,6 +1599,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head *gc = _PyGCHead_NEXT(not_visited); gc_list_move(gc, &increment); increment_size++; + assert(!_Py_IsImmortal(FROM_GC(gc))); gc_set_old_space(gc, gcstate->visited_space); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } @@ -1564,7 +1610,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR/ 2 / scale_factor; gcstate->work_to_do -= increment_size; validate_old(gcstate); @@ -1585,20 +1631,25 @@ gc_collect_full(PyThreadState *tstate, PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - /* merge all generations into visited */ - gc_list_validate_space(young, gcstate->visited_space); - gc_list_set_space(pending, gcstate->visited_space); + mark_global_roots(tstate->interp, visited, gcstate->visited_space); + mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + /* merge all generations into pending */ + gc_list_validate_space(young, 1-gcstate->visited_space); gc_list_merge(young, pending); + gc_list_set_space(visited, 1-gcstate->visited_space); + gc_list_merge(visited, pending); + /* Mark reachable */ + mark_global_roots(tstate->interp, visited, gcstate->visited_space); + mark_stacks(tstate->interp, visited, gcstate->visited_space, true); gcstate->young.count = 0; - gc_list_merge(pending, visited); - - gc_collect_region(tstate, visited, visited, + gc_list_set_space(pending, gcstate->visited_space); + gc_collect_region(tstate, pending, visited, UNTRACK_TUPLES | UNTRACK_DICTS, stats); gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; - + completed_cycle(gcstate); gcstate->work_to_do = - gcstate->young.threshold * 2; _PyGC_ClearAllFreeLists(tstate->interp); validate_old(gcstate); From ab1faeca1f198487ca5fb5c8376340a33141611c Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 6 Nov 2024 10:27:29 +0000 Subject: [PATCH 06/33] Redo stats --- Include/cpython/pystats.h | 4 +-- Python/gc.c | 48 +++++++++++++++++--------------- Python/specialize.c | 4 +-- Tools/scripts/summarize_stats.py | 7 +++-- 4 files changed, 34 insertions(+), 29 deletions(-) diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h index 2cc919c107c036..2ae48002d720e9 100644 --- a/Include/cpython/pystats.h +++ b/Include/cpython/pystats.h @@ -98,9 +98,9 @@ typedef struct _object_stats { typedef struct _gc_stats { uint64_t collections; uint64_t object_visits; - uint64_t mark_visits; uint64_t objects_collected; - uint64_t objects_marked; + uint64_t objects_transitively_reachable; + uint64_t objects_not_transitively_reachable; } GCStats; typedef struct _uop_stats { diff --git a/Python/gc.c b/Python/gc.c index 1a3f22879e1b69..daf623131eb396 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1370,14 +1370,7 @@ static int visit_add_to_container(PyObject *op, void *arg) { struct container_and_flag *cf = (struct container_and_flag *)arg; -#ifdef Py_STATS - if (cf->mark) { - GC_STAT_ADD(1, mark_visits, 1); - } - else { - OBJECT_STAT_INC(object_visits); - } -#endif + OBJECT_STAT_INC(object_visits); int visited = cf->visited_space; assert(visited == get_gc_state()->visited_space); if (!_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { @@ -1482,7 +1475,7 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) return arg.size; } -static int +static Py_ssize_t mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_space) { PyGC_Head reachable; @@ -1491,12 +1484,21 @@ mark_global_roots(PyInterpreterState *interp, PyGC_Head *visited, int visited_sp objects_marked += move_to_reachable(interp->sysdict, &reachable, visited_space); objects_marked += move_to_reachable(interp->builtins, &reachable, visited_space); objects_marked += move_to_reachable(interp->dict, &reachable, visited_space); + struct types_state *types = &interp->types; + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES; i++) { + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->builtins.initialized[i].tp_subclasses, &reachable, visited_space); + } + for (int i = 0; i < _Py_MAX_MANAGED_STATIC_EXT_TYPES; i++) { + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_dict, &reachable, visited_space); + objects_marked += move_to_reachable(types->for_extensions.initialized[i].tp_subclasses, &reachable, visited_space); + } objects_marked += mark_all_reachable(&reachable, visited, visited_space); assert(gc_list_is_empty(&reachable)); return objects_marked; } -static int +static Py_ssize_t mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, bool start) { PyGC_Head reachable; @@ -1550,7 +1552,7 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b return objects_marked; } -static void +static Py_ssize_t mark_at_start(PyThreadState *tstate) { // TO DO -- Make this incremental @@ -1560,12 +1562,8 @@ mark_at_start(PyThreadState *tstate) Py_ssize_t objects_marked = mark_global_roots(tstate->interp, visited, gcstate->visited_space); objects_marked += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); gcstate->work_to_do -= objects_marked; -#ifdef Py_STATS - if (_Py_stats) { - GC_STAT_ADD(1, objects_marked, objects_marked); - } -#endif gcstate->phase = GC_PHASE_COLLECT; + return objects_marked; } static void @@ -1575,7 +1573,9 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GCState *gcstate = &tstate->interp->gc; gcstate->work_to_do += gcstate->young.count; if (gcstate->phase == GC_PHASE_MARK) { - mark_at_start(tstate); + Py_ssize_t objects_marked = mark_at_start(tstate); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; return; } PyGC_Head *not_visited = &gcstate->old[gcstate->visited_space^1].head; @@ -1586,7 +1586,9 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) if (scale_factor < 1) { scale_factor = 1; } - gcstate->work_to_do -= mark_stacks(tstate->interp, visited, gcstate->visited_space, false); + Py_ssize_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); + GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); + gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; @@ -1603,6 +1605,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_set_old_space(gc, gcstate->visited_space); increment_size += expand_region_transitively_reachable(&increment, gc, gcstate); } + GC_STAT_ADD(1, objects_not_transitively_reachable, increment_size); gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); @@ -1631,16 +1634,17 @@ gc_collect_full(PyThreadState *tstate, PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - mark_global_roots(tstate->interp, visited, gcstate->visited_space); - mark_stacks(tstate->interp, visited, gcstate->visited_space, true); /* merge all generations into pending */ gc_list_validate_space(young, 1-gcstate->visited_space); gc_list_merge(young, pending); gc_list_set_space(visited, 1-gcstate->visited_space); gc_list_merge(visited, pending); /* Mark reachable */ - mark_global_roots(tstate->interp, visited, gcstate->visited_space); - mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + Py_ssize_t reachable = mark_global_roots(tstate->interp, visited, gcstate->visited_space); + reachable += mark_stacks(tstate->interp, visited, gcstate->visited_space, true); + (void)reachable; + GC_STAT_ADD(2, objects_transitively_reachable, reachable); + GC_STAT_ADD(2, objects_not_transitively_reachable, gc_list_size(pending)); gcstate->young.count = 0; gc_list_set_space(pending, gcstate->visited_space); gc_collect_region(tstate, pending, visited, diff --git a/Python/specialize.c b/Python/specialize.c index ebb3565bc53c4b..a6c39e0ce5581a 100644 --- a/Python/specialize.c +++ b/Python/specialize.c @@ -229,9 +229,9 @@ print_gc_stats(FILE *out, GCStats *stats) { for (int i = 0; i < NUM_GENERATIONS; i++) { fprintf(out, "GC[%d] collections: %" PRIu64 "\n", i, stats[i].collections); - fprintf(out, "GC[%d] marking visits: %" PRIu64 "\n", i, stats[i].mark_visits); fprintf(out, "GC[%d] object visits: %" PRIu64 "\n", i, stats[i].object_visits); - fprintf(out, "GC[%d] objects marked: %" PRIu64 "\n", i, stats[i].objects_marked); + fprintf(out, "GC[%d] objects reachable from roots: %" PRIu64 "\n", i, stats[i].objects_transitively_reachable); + fprintf(out, "GC[%d] objects not reachable from roots: %" PRIu64 "\n", i, stats[i].objects_not_transitively_reachable); fprintf(out, "GC[%d] objects collected: %" PRIu64 "\n", i, stats[i].objects_collected); } } diff --git a/Tools/scripts/summarize_stats.py b/Tools/scripts/summarize_stats.py index 6938ee492d68ae..296a31d5916bd8 100644 --- a/Tools/scripts/summarize_stats.py +++ b/Tools/scripts/summarize_stats.py @@ -1118,8 +1118,8 @@ def calc_gc_stats(stats: Stats) -> Rows: Count(gen["collections"]), Count(gen["objects collected"]), Count(gen["object visits"]), - Count(gen["marking visits"]), - Count(gen["objects marked"]), + Count(gen["objects reachable from roots"]), + Count(gen["objects not reachable from roots"]), ) for (i, gen) in enumerate(gc_stats) ] @@ -1129,7 +1129,8 @@ def calc_gc_stats(stats: Stats) -> Rows: "GC collections and effectiveness", [ Table( - ("Generation:", "Collections:", "Objects collected:", "Object visits:", "Marking visits:", "Objects marked:"), + ("Generation:", "Collections:", "Objects collected:", "Object visits:", + "Reachable from roots:", "Not reachable from roots:"), calc_gc_stats, ) ], From 9e2d93cbd45f4883ed2d63b399418939d2a41e87 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 6 Nov 2024 11:07:01 +0000 Subject: [PATCH 07/33] Fix freezing and GC untracking --- Lib/test/test_gc.py | 7 +++++-- Python/gc.c | 33 ++++++++------------------------- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 2b3c0d3baddeaf..2efd80d99a80ba 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1284,7 +1284,8 @@ def test_refcount_errors(self): from test.support import gc_collect, SuppressCrashReport a = [1, 2, 3] - b = [a] + b = [a, a] + a.append(b) # Avoid coredump when Py_FatalError() calls abort() SuppressCrashReport().__enter__() @@ -1294,6 +1295,8 @@ def test_refcount_errors(self): # (to avoid deallocating it): import ctypes ctypes.pythonapi.Py_DecRef(ctypes.py_object(a)) + del a + del b # The garbage collector should now have a fatal error # when it reaches the broken object @@ -1322,7 +1325,7 @@ def test_refcount_errors(self): self.assertRegex(stderr, br'object type name: list') self.assertRegex(stderr, - br'object repr : \[1, 2, 3\]') + br'object repr : \[1, 2, 3, \[\[...\], \[...\]\]\]') class GCTogglingTests(unittest.TestCase): diff --git a/Python/gc.c b/Python/gc.c index daf623131eb396..7a7e6069ceb683 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -726,7 +726,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) } static void -untrack_tuples(PyGC_Head *head) +untrack_tuples_and_dicts(PyGC_Head *head) { PyGC_Head *next, *gc = GC_NEXT(head); while (gc != head) { @@ -735,19 +735,7 @@ untrack_tuples(PyGC_Head *head) if (PyTuple_CheckExact(op)) { _PyTuple_MaybeUntrack(op); } - gc = next; - } -} - -/* Try to untrack all currently tracked dictionaries */ -static void -untrack_dicts(PyGC_Head *head) -{ - PyGC_Head *next, *gc = GC_NEXT(head); - while (gc != head) { - PyObject *op = FROM_GC(gc); - next = GC_NEXT(gc); - if (PyDict_CheckExact(op)) { + else if (PyDict_CheckExact(op)) { _PyDict_MaybeUntrack(op); } gc = next; @@ -1307,6 +1295,7 @@ gc_collect_young(PyThreadState *tstate, GCState *gcstate = &tstate->interp->gc; PyGC_Head *young = &gcstate->young.head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples_and_dicts(&gcstate->young.head); GC_STAT_ADD(0, collections, 1); #ifdef Py_STATS { @@ -1361,7 +1350,6 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space) struct container_and_flag { PyGC_Head *container; int visited_space; - int mark; uintptr_t size; }; @@ -1392,7 +1380,6 @@ expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCStat struct container_and_flag arg = { .container = container, .visited_space = gcstate->visited_space, - .mark = 0, .size = 0 }; assert(GC_NEXT(gc) == container); @@ -1458,7 +1445,6 @@ mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) struct container_and_flag arg = { .container = reachable, .visited_space = visited_space, - .mark = 1, .size = 0 }; while (!gc_list_is_empty(reachable)) { @@ -1590,6 +1576,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); + untrack_tuples_and_dicts(&gcstate->young.head); gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); @@ -1613,7 +1600,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR/ 2 / scale_factor; + gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; validate_old(gcstate); @@ -1634,6 +1621,7 @@ gc_collect_full(PyThreadState *tstate, PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; + untrack_tuples_and_dicts(&gcstate->young.head); /* merge all generations into pending */ gc_list_validate_space(young, 1-gcstate->visited_space); gc_list_merge(young, pending); @@ -1680,12 +1668,6 @@ gc_collect_region(PyThreadState *tstate, gc_list_init(&unreachable); deduce_unreachable(from, &unreachable); validate_consistent_old_space(from); - if (untrack & UNTRACK_TUPLES) { - untrack_tuples(from); - } - if (untrack & UNTRACK_DICTS) { - untrack_dicts(from); - } validate_consistent_old_space(to); if (from != to) { gc_list_merge(from, to); @@ -1905,9 +1887,10 @@ _PyGC_Freeze(PyInterpreterState *interp) { GCState *gcstate = &interp->gc; /* The permanent_generation has its old space bit set to zero */ - if (gcstate->visited_space) { + if (!gcstate->visited_space) { gc_list_set_space(&gcstate->young.head, 0); } + gc_list_validate_space(&gcstate->young.head, 0); gc_list_merge(&gcstate->young.head, &gcstate->permanent_generation.head); gcstate->young.count = 0; PyGC_Head*old0 = &gcstate->old[0].head; From 3c18fc8760ae8d5ede0f760d4ed30641f4b76c7e Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 6 Nov 2024 11:42:21 +0000 Subject: [PATCH 08/33] Don't untrack dicts --- Lib/test/test_dict.py | 109 ------------------------------------- Objects/dictobject.c | 92 ++----------------------------- Objects/moduleobject.c | 2 - Python/bytecodes.c | 4 -- Python/executor_cases.c.h | 4 -- Python/gc.c | 11 ++-- Python/generated_cases.c.h | 4 -- 7 files changed, 8 insertions(+), 218 deletions(-) diff --git a/Lib/test/test_dict.py b/Lib/test/test_dict.py index 4030716efb51f9..c94dc2df4f0a7f 100644 --- a/Lib/test/test_dict.py +++ b/Lib/test/test_dict.py @@ -880,115 +880,6 @@ class C(object): gc.collect() self.assertIs(ref(), None, "Cycle was not collected") - def _not_tracked(self, t): - # Nested containers can take several collections to untrack - gc.collect() - gc.collect() - self.assertFalse(gc.is_tracked(t), t) - - def _tracked(self, t): - self.assertTrue(gc.is_tracked(t), t) - gc.collect() - gc.collect() - self.assertTrue(gc.is_tracked(t), t) - - def test_string_keys_can_track_values(self): - # Test that this doesn't leak. - for i in range(10): - d = {} - for j in range(10): - d[str(j)] = j - d["foo"] = d - - @support.cpython_only - def test_track_literals(self): - # Test GC-optimization of dict literals - x, y, z, w = 1.5, "a", (1, None), [] - - self._not_tracked({}) - self._not_tracked({x:(), y:x, z:1}) - self._not_tracked({1: "a", "b": 2}) - self._not_tracked({1: 2, (None, True, False, ()): int}) - self._not_tracked({1: object()}) - - # Dicts with mutable elements are always tracked, even if those - # elements are not tracked right now. - self._tracked({1: []}) - self._tracked({1: ([],)}) - self._tracked({1: {}}) - self._tracked({1: set()}) - - @support.cpython_only - def test_track_dynamic(self): - # Test GC-optimization of dynamically-created dicts - class MyObject(object): - pass - x, y, z, w, o = 1.5, "a", (1, object()), [], MyObject() - - d = dict() - self._not_tracked(d) - d[1] = "a" - self._not_tracked(d) - d[y] = 2 - self._not_tracked(d) - d[z] = 3 - self._not_tracked(d) - self._not_tracked(d.copy()) - d[4] = w - self._tracked(d) - self._tracked(d.copy()) - d[4] = None - self._not_tracked(d) - self._not_tracked(d.copy()) - - # dd isn't tracked right now, but it may mutate and therefore d - # which contains it must be tracked. - d = dict() - dd = dict() - d[1] = dd - self._not_tracked(dd) - self._tracked(d) - dd[1] = d - self._tracked(dd) - - d = dict.fromkeys([x, y, z]) - self._not_tracked(d) - dd = dict() - dd.update(d) - self._not_tracked(dd) - d = dict.fromkeys([x, y, z, o]) - self._tracked(d) - dd = dict() - dd.update(d) - self._tracked(dd) - - d = dict(x=x, y=y, z=z) - self._not_tracked(d) - d = dict(x=x, y=y, z=z, w=w) - self._tracked(d) - d = dict() - d.update(x=x, y=y, z=z) - self._not_tracked(d) - d.update(w=w) - self._tracked(d) - - d = dict([(x, y), (z, 1)]) - self._not_tracked(d) - d = dict([(x, y), (z, w)]) - self._tracked(d) - d = dict() - d.update([(x, y), (z, 1)]) - self._not_tracked(d) - d.update([(x, y), (z, w)]) - self._tracked(d) - - @support.cpython_only - def test_track_subtypes(self): - # Dict subtypes are always tracked - class MyDict(dict): - pass - self._tracked(MyDict()) - def make_shared_key_dict(self, n): class C: pass diff --git a/Objects/dictobject.c b/Objects/dictobject.c index f28a92657834be..5813fdb550954c 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -883,6 +883,7 @@ new_dict(PyInterpreterState *interp, mp->ma_used = used; mp->_ma_watcher_tag = 0; ASSERT_CONSISTENT(mp); + _PyObject_GC_TRACK(mp); return (PyObject *)mp; } @@ -1578,64 +1579,6 @@ _PyDict_HasOnlyStringKeys(PyObject *dict) return 1; } -#define MAINTAIN_TRACKING(mp, key, value) \ - do { \ - if (!_PyObject_GC_IS_TRACKED(mp)) { \ - if (_PyObject_GC_MAY_BE_TRACKED(key) || \ - _PyObject_GC_MAY_BE_TRACKED(value)) { \ - _PyObject_GC_TRACK(mp); \ - } \ - } \ - } while(0) - -void -_PyDict_MaybeUntrack(PyObject *op) -{ - PyDictObject *mp; - PyObject *value; - Py_ssize_t i, numentries; - - ASSERT_WORLD_STOPPED_OR_DICT_LOCKED(op); - - if (!PyDict_CheckExact(op) || !_PyObject_GC_IS_TRACKED(op)) - return; - - mp = (PyDictObject *) op; - ASSERT_CONSISTENT(mp); - numentries = mp->ma_keys->dk_nentries; - if (_PyDict_HasSplitTable(mp)) { - for (i = 0; i < numentries; i++) { - if ((value = mp->ma_values->values[i]) == NULL) - continue; - if (_PyObject_GC_MAY_BE_TRACKED(value)) { - return; - } - } - } - else { - if (DK_IS_UNICODE(mp->ma_keys)) { - PyDictUnicodeEntry *ep0 = DK_UNICODE_ENTRIES(mp->ma_keys); - for (i = 0; i < numentries; i++) { - if ((value = ep0[i].me_value) == NULL) - continue; - if (_PyObject_GC_MAY_BE_TRACKED(value)) - return; - } - } - else { - PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys); - for (i = 0; i < numentries; i++) { - if ((value = ep0[i].me_value) == NULL) - continue; - if (_PyObject_GC_MAY_BE_TRACKED(value) || - _PyObject_GC_MAY_BE_TRACKED(ep0[i].me_key)) - return; - } - } - } - _PyObject_GC_UNTRACK(op); -} - void _PyDict_EnablePerThreadRefcounting(PyObject *op) { @@ -1761,7 +1704,6 @@ insert_split_value(PyInterpreterState *interp, PyDictObject *mp, PyObject *key, { assert(PyUnicode_CheckExact(key)); ASSERT_DICT_LOCKED(mp); - MAINTAIN_TRACKING(mp, key, value); PyObject *old_value = mp->ma_values->values[ix]; if (old_value == NULL) { _PyDict_NotifyEvent(interp, PyDict_EVENT_ADDED, mp, key, value); @@ -1818,8 +1760,6 @@ insertdict(PyInterpreterState *interp, PyDictObject *mp, if (ix == DKIX_ERROR) goto Fail; - MAINTAIN_TRACKING(mp, key, value); - if (ix == DKIX_EMPTY) { assert(!_PyDict_HasSplitTable(mp)); /* Insert into new slot. */ @@ -1878,8 +1818,6 @@ insert_to_emptydict(PyInterpreterState *interp, PyDictObject *mp, /* We don't decref Py_EMPTY_KEYS here because it is immortal. */ assert(mp->ma_values == NULL); - MAINTAIN_TRACKING(mp, key, value); - size_t hashpos = (size_t)hash & (PyDict_MINSIZE-1); dictkeys_set_index(newkeys, hashpos, 0); if (unicode) { @@ -3770,11 +3708,6 @@ dict_dict_merge(PyInterpreterState *interp, PyDictObject *mp, PyDictObject *othe STORE_USED(mp, other->ma_used); ASSERT_CONSISTENT(mp); - if (_PyObject_GC_IS_TRACKED(other) && !_PyObject_GC_IS_TRACKED(mp)) { - /* Maintain tracking. */ - _PyObject_GC_TRACK(mp); - } - return 0; } } @@ -4024,8 +3957,7 @@ copy_lock_held(PyObject *o) split_copy->ma_used = mp->ma_used; split_copy->_ma_watcher_tag = 0; dictkeys_incref(mp->ma_keys); - if (_PyObject_GC_IS_TRACKED(mp)) - _PyObject_GC_TRACK(split_copy); + _PyObject_GC_TRACK(split_copy); return (PyObject *)split_copy; } @@ -4060,10 +3992,6 @@ copy_lock_held(PyObject *o) new->ma_used = mp->ma_used; ASSERT_CONSISTENT(new); - if (_PyObject_GC_IS_TRACKED(mp)) { - /* Maintain tracking. */ - _PyObject_GC_TRACK(new); - } return (PyObject *)new; } @@ -4350,8 +4278,6 @@ dict_setdefault_ref_lock_held(PyObject *d, PyObject *key, PyObject *default_valu *result = NULL; } } - - MAINTAIN_TRACKING(mp, key, value); STORE_USED(mp, mp->ma_used + 1); assert(mp->ma_keys->dk_usable >= 0); ASSERT_CONSISTENT(mp); @@ -4801,15 +4727,8 @@ dict_new(PyTypeObject *type, PyObject *args, PyObject *kwds) d->ma_values = NULL; ASSERT_CONSISTENT(d); - if (type != &PyDict_Type) { - // Don't track if a subclass tp_alloc is PyType_GenericAlloc() - if (!_PyObject_GC_IS_TRACKED(d)) { - _PyObject_GC_TRACK(d); - } - } - else { - // _PyType_AllocNoTrack() does not track the created object - assert(!_PyObject_GC_IS_TRACKED(d)); + if (!_PyObject_GC_IS_TRACKED(d)) { + _PyObject_GC_TRACK(d); } return self; } @@ -6755,9 +6674,6 @@ make_dict_from_instance_attributes(PyInterpreterState *interp, } } PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0); - if (track && res) { - _PyObject_GC_TRACK(res); - } return res; } diff --git a/Objects/moduleobject.c b/Objects/moduleobject.c index 535b0d068f064f..a8d64c9aefae6b 100644 --- a/Objects/moduleobject.c +++ b/Objects/moduleobject.c @@ -107,8 +107,6 @@ static void track_module(PyModuleObject *m) { _PyDict_EnablePerThreadRefcounting(m->md_dict); - PyObject_GC_Track(m->md_dict); - _PyObject_SetDeferredRefcount((PyObject *)m); PyObject_GC_Track(m); } diff --git a/Python/bytecodes.c b/Python/bytecodes.c index fa98af12c69aef..9731de69dea145 100644 --- a/Python/bytecodes.c +++ b/Python/bytecodes.c @@ -2316,10 +2316,6 @@ dummy_func( DEOPT_IF(ep->me_key != name); PyObject *old_value = ep->me_value; DEOPT_IF(old_value == NULL); - /* Ensure dict is GC tracked if it needs to be */ - if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { - _PyObject_GC_TRACK(dict); - } _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); ep->me_value = PyStackRef_AsPyObjectSteal(value); // old_value should be DECREFed after GC track checking is done, if not, it could raise a segmentation fault, diff --git a/Python/executor_cases.c.h b/Python/executor_cases.c.h index ff4a0a52a0b445..321c2db80b4e49 100644 --- a/Python/executor_cases.c.h +++ b/Python/executor_cases.c.h @@ -2905,10 +2905,6 @@ UOP_STAT_INC(uopcode, miss); JUMP_TO_JUMP_TARGET(); } - /* Ensure dict is GC tracked if it needs to be */ - if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { - _PyObject_GC_TRACK(dict); - } _PyFrame_SetStackPointer(frame, stack_pointer); _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); stack_pointer = _PyFrame_GetStackPointer(frame); diff --git a/Python/gc.c b/Python/gc.c index 7a7e6069ceb683..c9d753beb897a7 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -726,7 +726,7 @@ move_unreachable(PyGC_Head *young, PyGC_Head *unreachable) } static void -untrack_tuples_and_dicts(PyGC_Head *head) +untrack_tuples(PyGC_Head *head) { PyGC_Head *next, *gc = GC_NEXT(head); while (gc != head) { @@ -735,9 +735,6 @@ untrack_tuples_and_dicts(PyGC_Head *head) if (PyTuple_CheckExact(op)) { _PyTuple_MaybeUntrack(op); } - else if (PyDict_CheckExact(op)) { - _PyDict_MaybeUntrack(op); - } gc = next; } } @@ -1295,7 +1292,7 @@ gc_collect_young(PyThreadState *tstate, GCState *gcstate = &tstate->interp->gc; PyGC_Head *young = &gcstate->young.head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - untrack_tuples_and_dicts(&gcstate->young.head); + untrack_tuples(&gcstate->young.head); GC_STAT_ADD(0, collections, 1); #ifdef Py_STATS { @@ -1576,7 +1573,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - untrack_tuples_and_dicts(&gcstate->young.head); + untrack_tuples(&gcstate->young.head); gc_list_merge(&gcstate->young.head, &increment); gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); @@ -1621,7 +1618,7 @@ gc_collect_full(PyThreadState *tstate, PyGC_Head *young = &gcstate->young.head; PyGC_Head *pending = &gcstate->old[gcstate->visited_space^1].head; PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; - untrack_tuples_and_dicts(&gcstate->young.head); + untrack_tuples(&gcstate->young.head); /* merge all generations into pending */ gc_list_validate_space(young, 1-gcstate->visited_space); gc_list_merge(young, pending); diff --git a/Python/generated_cases.c.h b/Python/generated_cases.c.h index 632cbc7790a4d8..6d4247d008db7d 100644 --- a/Python/generated_cases.c.h +++ b/Python/generated_cases.c.h @@ -7389,10 +7389,6 @@ DEOPT_IF(ep->me_key != name, STORE_ATTR); PyObject *old_value = ep->me_value; DEOPT_IF(old_value == NULL, STORE_ATTR); - /* Ensure dict is GC tracked if it needs to be */ - if (!_PyObject_GC_IS_TRACKED(dict) && _PyObject_GC_MAY_BE_TRACKED(PyStackRef_AsPyObjectBorrow(value))) { - _PyObject_GC_TRACK(dict); - } _PyFrame_SetStackPointer(frame, stack_pointer); _PyDict_NotifyEvent(tstate->interp, PyDict_EVENT_MODIFIED, dict, name, PyStackRef_AsPyObjectBorrow(value)); stack_pointer = _PyFrame_GetStackPointer(frame); From 94da963899a527163bd80ccce294a6b4df311f11 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 6 Nov 2024 14:41:30 +0000 Subject: [PATCH 09/33] Remove lazy dict tracking from no-gil build --- Include/internal/pycore_dict.h | 2 -- Python/gc.c | 2 +- Python/gc_free_threading.c | 9 +-------- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h index c5399ad8e0497f..f7d747cc6c62a1 100644 --- a/Include/internal/pycore_dict.h +++ b/Include/internal/pycore_dict.h @@ -43,8 +43,6 @@ extern int _PyDict_Next( extern int _PyDict_HasOnlyStringKeys(PyObject *mp); -extern void _PyDict_MaybeUntrack(PyObject *mp); - // Export for '_ctypes' shared extension PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *); diff --git a/Python/gc.c b/Python/gc.c index c9d753beb897a7..86cc7217b9714c 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -5,7 +5,7 @@ #include "Python.h" #include "pycore_ceval.h" // _Py_set_eval_breaker_bit() #include "pycore_context.h" -#include "pycore_dict.h" // _PyDict_MaybeUntrack() +#include "pycore_dict.h" // _PyInlineValuesSize() #include "pycore_initconfig.h" #include "pycore_interp.h" // PyInterpreterState.gc #include "pycore_object.h" diff --git a/Python/gc_free_threading.c b/Python/gc_free_threading.c index 1969ed608ea524..eb6fe752f46668 100644 --- a/Python/gc_free_threading.c +++ b/Python/gc_free_threading.c @@ -3,7 +3,7 @@ #include "pycore_brc.h" // struct _brc_thread_state #include "pycore_ceval.h" // _Py_set_eval_breaker_bit() #include "pycore_context.h" -#include "pycore_dict.h" // _PyDict_MaybeUntrack() +#include "pycore_dict.h" // _PyInlineValuesSize() #include "pycore_freelist.h" // _PyObject_ClearFreeLists() #include "pycore_initconfig.h" #include "pycore_interp.h" // PyInterpreterState.gc @@ -484,13 +484,6 @@ update_refs(const mi_heap_t *heap, const mi_heap_area_t *area, return true; } } - else if (PyDict_CheckExact(op)) { - _PyDict_MaybeUntrack(op); - if (!_PyObject_GC_IS_TRACKED(op)) { - gc_restore_refs(op); - return true; - } - } } // We repurpose ob_tid to compute "gc_refs", the number of external From 659fd1e638283a4a498b0d7cf3e72438f0105408 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 6 Nov 2024 15:19:26 +0000 Subject: [PATCH 10/33] Remove unused variable --- Objects/dictobject.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 5813fdb550954c..bb13a867aad645 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -6664,13 +6664,11 @@ make_dict_from_instance_attributes(PyInterpreterState *interp, { dictkeys_incref(keys); Py_ssize_t used = 0; - Py_ssize_t track = 0; size_t size = shared_keys_usable_size(keys); for (size_t i = 0; i < size; i++) { PyObject *val = values->values[i]; if (val != NULL) { used += 1; - track += _PyObject_GC_MAY_BE_TRACKED(val); } } PyDictObject *res = (PyDictObject *)new_dict(interp, keys, values, used, 0); From 4cfbc4f45dd219af5362383d526f8c4f395dec21 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 6 Nov 2024 15:22:42 +0000 Subject: [PATCH 11/33] Add news --- .../2024-11-06-15-22-34.gh-issue-126491.n9VyZc.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-15-22-34.gh-issue-126491.n9VyZc.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-15-22-34.gh-issue-126491.n9VyZc.rst b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-15-22-34.gh-issue-126491.n9VyZc.rst new file mode 100644 index 00000000000000..f69977852c994a --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2024-11-06-15-22-34.gh-issue-126491.n9VyZc.rst @@ -0,0 +1,4 @@ +Add a marking phase to the GC. All objects that can be transitively +reached from builtin modules or the stacks are marked as reachable +before cycle detection. This reduces the amount of work done by the +GC by approximately half. From 8c92ca6483d2da199d1f0d2795e100475032e14a Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 6 Nov 2024 16:55:10 +0000 Subject: [PATCH 12/33] Fix use after free --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index bb13a867aad645..c581690dff625b 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7173,6 +7173,7 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) PyDictValues *values = copy_values(mp->ma_values); + mp->ma_values = values; if (values == NULL) { /* Out of memory. Clear the dict */ PyInterpreterState *interp = _PyInterpreterState_GET(); @@ -7183,7 +7184,6 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) PyErr_NoMemory(); return -1; } - mp->ma_values = values; FT_ATOMIC_STORE_UINT8(_PyObject_InlineValues(obj)->valid, 0); From 12d7f7cac333a5d6c8b319dd72b9ee86421a6589 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 7 Nov 2024 11:55:08 +0000 Subject: [PATCH 13/33] Attempt more careful fix of use-after-free --- Objects/dictobject.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index c581690dff625b..9db4e507a51e7d 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7117,6 +7117,7 @@ _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict) // since we locked it. dict = _PyObject_ManagedDictPointer(obj)->dict; err = _PyDict_DetachFromObject(dict, obj); + assert(err == 0 || new_dict == NULL); if (err == 0) { FT_ATOMIC_STORE_PTR(_PyObject_ManagedDictPointer(obj)->dict, (PyDictObject *)Py_XNewRef(new_dict)); @@ -7149,7 +7150,21 @@ void PyObject_ClearManagedDict(PyObject *obj) { if (_PyObject_SetManagedDict(obj, NULL) < 0) { + /* Must be out of memory */ + assert(PyErr_Occurred() == PyExc_MemoryError); PyErr_WriteUnraisable(NULL); + /* Clear the dict */ + PyDictObject *dict = _PyObject_GetManagedDict(obj); + Py_BEGIN_CRITICAL_SECTION2(dict, obj); + dict = _PyObject_ManagedDictPointer(obj)->dict; + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyDictKeysObject *oldkeys = dict->ma_keys; + set_keys(dict, Py_EMPTY_KEYS); + dict->ma_values = NULL; + dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); + STORE_USED(dict, 0); + set_dict_inline_values(obj, NULL); + Py_END_CRITICAL_SECTION2(); } } @@ -7173,17 +7188,11 @@ _PyDict_DetachFromObject(PyDictObject *mp, PyObject *obj) PyDictValues *values = copy_values(mp->ma_values); - mp->ma_values = values; if (values == NULL) { - /* Out of memory. Clear the dict */ - PyInterpreterState *interp = _PyInterpreterState_GET(); - PyDictKeysObject *oldkeys = mp->ma_keys; - set_keys(mp, Py_EMPTY_KEYS); - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); - STORE_USED(mp, 0); PyErr_NoMemory(); return -1; } + mp->ma_values = values; FT_ATOMIC_STORE_UINT8(_PyObject_InlineValues(obj)->valid, 0); From 1f619d7f3aeb98803feb0452c75d0a96ea6820d2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 7 Nov 2024 12:04:03 +0000 Subject: [PATCH 14/33] Typo --- Objects/dictobject.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Objects/dictobject.c b/Objects/dictobject.c index 9db4e507a51e7d..8e9231bcc4ba62 100644 --- a/Objects/dictobject.c +++ b/Objects/dictobject.c @@ -7161,7 +7161,7 @@ PyObject_ClearManagedDict(PyObject *obj) PyDictKeysObject *oldkeys = dict->ma_keys; set_keys(dict, Py_EMPTY_KEYS); dict->ma_values = NULL; - dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(mp)); + dictkeys_decref(interp, oldkeys, IS_DICT_SHARED(dict)); STORE_USED(dict, 0); set_dict_inline_values(obj, NULL); Py_END_CRITICAL_SECTION2(); From b55fe37afc9f76558a38b9ee6acc0709192af5f5 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 7 Nov 2024 12:24:59 +0000 Subject: [PATCH 15/33] Fix use of uninitialized variable --- Python/gc.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 86cc7217b9714c..f4f7c7b21fd612 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1495,13 +1495,15 @@ mark_stacks(PyInterpreterState *interp, PyGC_Head *visited, int visited_space, b while (ts) { _PyInterpreterFrame *frame = ts->current_frame; while (frame) { + if (frame->owner == FRAME_OWNED_BY_CSTACK) { + frame = frame->previous; + continue; + } _PyStackRef *locals = frame->localsplus; _PyStackRef *sp = frame->stackpointer; - if (frame->owner != FRAME_OWNED_BY_CSTACK) { - objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); - PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); - objects_marked += move_to_reachable(func, &reachable, visited_space); - } + objects_marked += move_to_reachable(frame->f_locals, &reachable, visited_space); + PyObject *func = PyStackRef_AsPyObjectBorrow(frame->f_funcobj); + objects_marked += move_to_reachable(func, &reachable, visited_space); while (sp > locals) { sp--; if (PyStackRef_IsNull(*sp)) { From 73b7f52b6755c86b1fd0d655bdd41e9912be2aed Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 7 Nov 2024 13:50:10 +0000 Subject: [PATCH 16/33] Fix compiler warnings --- Python/gc.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index f4f7c7b21fd612..1e9ff7b28b1dfc 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1347,7 +1347,7 @@ IS_IN_VISITED(PyGC_Head *gc, int visited_space) struct container_and_flag { PyGC_Head *container; int visited_space; - uintptr_t size; + Py_ssize_t size; }; /* A traversal callback for adding to container) */ @@ -1370,7 +1370,7 @@ visit_add_to_container(PyObject *op, void *arg) return 0; } -static uintptr_t +static Py_ssize_t expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) { validate_list(container, collecting_clear_unreachable_clear); @@ -1420,7 +1420,7 @@ completed_cycle(GCState *gcstate) gcstate->phase = GC_PHASE_MARK; } -static int +static Py_ssize_t move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) { if (op != NULL && !_Py_IsImmortal(op) && _PyObject_IS_GC(op)) { @@ -1435,7 +1435,7 @@ move_to_reachable(PyObject *op, PyGC_Head *reachable, int visited_space) return 0; } -static int +static Py_ssize_t mark_all_reachable(PyGC_Head *reachable, PyGC_Head *visited, int visited_space) { // Transitively traverse all objects from reachable, until empty From 33f6386474fea82d63edfe506ce1465e5c9831de Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 7 Nov 2024 13:53:48 +0000 Subject: [PATCH 17/33] Tweak test --- Lib/test/test_gc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 2efd80d99a80ba..23592668de616f 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1142,7 +1142,7 @@ def make_ll(depth): collected = young['collected'] + incremental['collected'] + old['collected'] count += CORRECTION live = count - collected - self.assertLess(live, 25000) + self.assertLess(live, 27000) del olds[:] if not enabled: gc.disable() From 8574d0051e99d0c130b4a285f74ff0a5d4224744 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 11 Nov 2024 16:41:48 +0000 Subject: [PATCH 18/33] Add section to internal docs --- InternalDocs/garbage_collector.md | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index d624cf4befd31a..21b14970ba3130 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -351,6 +351,29 @@ follows these steps in order: the reference counts fall to 0, triggering the destruction of all unreachable objects. + +Optimization: marking +===================== + +Only objects that cannot be reached, can be garbage. +To avoid performing the complex algorithm above on the whole heap, we first +mark all objects that can be reached either from a frame stack or from global +objects like the modules or builtin classes. + +This marking step does much less work per object, so reduces the time spent +performing garbage collection by at least half. + +This mark phase marks all object that are transitively reachable from the +roots as follows: +* All objects directly referred by any builtin class, the `sys` module, the `builtins` +module or any frame stack are added to a working set of reachable objects. +* Until this working set is empty: + * Pop an object from the set and move it to the reachable set + * For each object directly reachable from that object: + * If it is not already reachable and it is a GC object, then move it to + the working set + + Optimization: generations ========================= @@ -588,9 +611,9 @@ heap. be more difficult. -> [!NOTE] +> [!NOTE] > **Document history** -> +> > Pablo Galindo Salgado - Original author -> +> > Irit Katriel - Convert to Markdown From 70007b00bb31b1723549371c8ad36981e14b576f Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 11 Nov 2024 16:44:48 +0000 Subject: [PATCH 19/33] Rephrase new docs --- InternalDocs/garbage_collector.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 21b14970ba3130..35d9e0bdf454a8 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -355,9 +355,10 @@ follows these steps in order: Optimization: marking ===================== -Only objects that cannot be reached, can be garbage. +An object cannot be garbage if it can be reached. + To avoid performing the complex algorithm above on the whole heap, we first -mark all objects that can be reached either from a frame stack or from global +mark all objects that can be reached from any frame stack or from global objects like the modules or builtin classes. This marking step does much less work per object, so reduces the time spent From f043080d0549efdb0ffd36fc6c7b398cf5372100 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 13 Nov 2024 10:42:42 +0000 Subject: [PATCH 20/33] Use symbolic constant --- Include/internal/pycore_runtime_init.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h index 60faa703858490..2b07d568078a5a 100644 --- a/Include/internal/pycore_runtime_init.h +++ b/Include/internal/pycore_runtime_init.h @@ -132,7 +132,7 @@ extern PyTypeObject _PyExc_MemoryError; { .threshold = 0, }, \ }, \ .work_to_do = -5000, \ - .phase = 0, \ + .phase = GC_PHASE_MARK, \ }, \ .qsbr = { \ .wr_seq = QSBR_INITIAL, \ From db2e173e7667d17ffab1526cb062597617b99e3d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 13 Nov 2024 10:50:35 +0000 Subject: [PATCH 21/33] Update section on untracking --- InternalDocs/garbage_collector.md | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 35d9e0bdf454a8..fbf15e744b6de2 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -520,8 +520,8 @@ of `PyGC_Head` discussed in the `Memory layout and object structure`_ section: currently in. Instead, when that's needed, ad hoc tricks (like the `NEXT_MASK_UNREACHABLE` flag) are employed. -Optimization: delay tracking containers -======================================= +Optimization: delayed untracking containers +=========================================== Certain types of containers cannot participate in a reference cycle, and so do not need to be tracked by the garbage collector. Untracking these objects @@ -536,8 +536,8 @@ a container: As a general rule, instances of atomic types aren't tracked and instances of non-atomic types (containers, user-defined objects...) are. However, some type-specific optimizations can be present in order to suppress the garbage -collector footprint of simple instances. Some examples of native types that -benefit from delayed tracking: +collector footprint of simple instances. Historically, both dictionaries and +tuples were untracked during garbage collection. Now it is only tuples: - Tuples containing only immutable objects (integers, strings etc, and recursively, tuples of immutable objects) do not need to be tracked. The @@ -546,14 +546,8 @@ benefit from delayed tracking: tuples at creation time. Instead, all tuples except the empty tuple are tracked when created. During garbage collection it is determined whether any surviving tuples can be untracked. A tuple can be untracked if all of its contents are - already not tracked. Tuples are examined for untracking in all garbage collection - cycles. It may take more than one cycle to untrack a tuple. - -- Dictionaries containing only immutable objects also do not need to be tracked. - Dictionaries are untracked when created. If a tracked item is inserted into a - dictionary (either as a key or value), the dictionary becomes tracked. During a - full garbage collection (all generations), the collector will untrack any dictionaries - whose contents are not tracked. + already not tracked. Tuples are examined for untracking when moved from the + young to the old generation. The garbage collector module provides the Python function `is_tracked(obj)`, which returns the current tracking status of the object. Subsequent garbage collections may change the @@ -566,11 +560,9 @@ tracking status of the object. False >>> gc.is_tracked([]) True - >>> gc.is_tracked({}) + >>> gc.is_tracked(("a": 1)) False >>> gc.is_tracked({"a": 1}) - False - >>> gc.is_tracked({"a": []}) True ``` From b9467ec138ca1eeac2b40165bcd4151ad7a227eb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 14 Nov 2024 10:34:05 +0000 Subject: [PATCH 22/33] Update docs --- InternalDocs/garbage_collector.md | 60 +++++++++++++++++++------------ 1 file changed, 37 insertions(+), 23 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 85ad2989e441b5..03aa0a320f7607 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -352,29 +352,6 @@ follows these steps in order: objects. -Optimization: marking -===================== - -An object cannot be garbage if it can be reached. - -To avoid performing the complex algorithm above on the whole heap, we first -mark all objects that can be reached from any frame stack or from global -objects like the modules or builtin classes. - -This marking step does much less work per object, so reduces the time spent -performing garbage collection by at least half. - -This mark phase marks all object that are transitively reachable from the -roots as follows: -* All objects directly referred by any builtin class, the `sys` module, the `builtins` -module or any frame stack are added to a working set of reachable objects. -* Until this working set is empty: - * Pop an object from the set and move it to the reachable set - * For each object directly reachable from that object: - * If it is not already reachable and it is a GC object, then move it to - the working set - - Optimization: incremental collection ==================================== @@ -508,6 +485,43 @@ specifically in a generation by calling `gc.collect(generation=NUM)`. ``` +Optimization: visiting reachable objects +======================================== + +An object cannot be garbage if it can be reached. + +To avoid having to identify reference cycles across the whole heap, we can +reduce the amount of work done considerably by first moving most reachable objects +to the `visited` space. Empirically, most reachable objects can be reached from a +small set of global objects and local variables. +This step does much less work per object, so reduces the time spent +performing garbage collection by at least half. + +> [!NOTE] +> Objects that are not determined to be reachable by this pass are not necessarily +> unreachable. We still need to perform the main algorithm to determine which objects +> are actually unreachable. + +We use the same technique of forming a transitive closure as the incremental +collector does to find reachable objects, seeding the list with some global +objects and the current frame of each stack. + +This mark phase moves all objects `visited` space, as follows: + +1. All objects directly referred by any builtin class, the `sys` module, the `builtins` +module and all objects directly referred to from stack frames are added to a working +set of reachable objects. +2. Until this working set is empty: + 1. Pop an object from the set and move it to the `visited` space + 2. For each object directly reachable from that object: + * If it is not already in `visited` space and it is a GC object, + then move it to the working set + +Before each increment of collection is performed, any stack frames that have been created +since the last increment are added to the working set and above algorithm is repeated, +starting from step 2. + + Optimization: reusing fields to save memory =========================================== From 14ae8d73238376e584479f0ee819c3c45f823028 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 14 Nov 2024 10:43:49 +0000 Subject: [PATCH 23/33] A few more edits --- InternalDocs/garbage_collector.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 03aa0a320f7607..0c1f3a8ae0d014 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -506,20 +506,20 @@ We use the same technique of forming a transitive closure as the incremental collector does to find reachable objects, seeding the list with some global objects and the current frame of each stack. -This mark phase moves all objects `visited` space, as follows: +This phase moves objects to the `visited` space, as follows: -1. All objects directly referred by any builtin class, the `sys` module, the `builtins` +1. All objects directly referred to by any builtin class, the `sys` module, the `builtins` module and all objects directly referred to from stack frames are added to a working set of reachable objects. 2. Until this working set is empty: 1. Pop an object from the set and move it to the `visited` space 2. For each object directly reachable from that object: * If it is not already in `visited` space and it is a GC object, - then move it to the working set + then add it to the working set -Before each increment of collection is performed, any stack frames that have been created -since the last increment are added to the working set and above algorithm is repeated, -starting from step 2. +Before each increment of collection is performed, the working set is updated to +include any new stack frames that have been created since the last increment. +Then the above algorithm is repeated, starting from step 2. Optimization: reusing fields to save memory From 3337512fee8b6de13e30683340bed84dd1b4b091 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 14 Nov 2024 11:08:31 +0000 Subject: [PATCH 24/33] Update comment --- Include/internal/pycore_gc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index 05aefad98fb113..ea1b5ec4ede68c 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -10,11 +10,11 @@ extern "C" { /* GC information is stored BEFORE the object structure. */ typedef struct { - // Pointer to next object in the list. + // Tagged pointer to next object in the list. // 0 means the object is not tracked uintptr_t _gc_next; - // Pointer to previous object in the list. + // Tagged pointer to previous object in the list. // Lowest two bits are used for flags documented later. uintptr_t _gc_prev; } PyGC_Head; From 3ae87fa285c25a2e6ada98d894931fbbacf750ce Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 14 Nov 2024 12:27:50 +0000 Subject: [PATCH 25/33] Address doc review comments --- InternalDocs/garbage_collector.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 0c1f3a8ae0d014..796b14a249847d 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -504,7 +504,7 @@ performing garbage collection by at least half. We use the same technique of forming a transitive closure as the incremental collector does to find reachable objects, seeding the list with some global -objects and the current frame of each stack. +objects and the currently executing frames. This phase moves objects to the `visited` space, as follows: @@ -515,10 +515,13 @@ set of reachable objects. 1. Pop an object from the set and move it to the `visited` space 2. For each object directly reachable from that object: * If it is not already in `visited` space and it is a GC object, - then add it to the working set + add it to the working set -Before each increment of collection is performed, the working set is updated to -include any new stack frames that have been created since the last increment. + +Before each increment of collection is performed, the stacks are scanned +to check for any new stack frames that have been created since the last +increment. All objects directly referred to from those stack frames are +added to the working set. Then the above algorithm is repeated, starting from step 2. @@ -570,8 +573,8 @@ of `PyGC_Head` discussed in the `Memory layout and object structure`_ section: currently in. Instead, when that's needed, ad hoc tricks (like the `NEXT_MASK_UNREACHABLE` flag) are employed. -Optimization: delayed untracking containers -=========================================== +Optimization: delayed untracking of containers +============================================== Certain types of containers cannot participate in a reference cycle, and so do not need to be tracked by the garbage collector. Untracking these objects From 1452378663e4f3632c26f96921af84b884c98617 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 15 Nov 2024 11:38:19 +0000 Subject: [PATCH 26/33] Avoid repeated collection of the young gen --- Python/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index 5ac03d02611df8..7e9f8d415ef580 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1565,6 +1565,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; gcstate->work_to_do += gcstate->young.count; + gcstate->young.count = 0; if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); @@ -1585,7 +1586,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_set_space(&gcstate->young.head, gcstate->visited_space); untrack_tuples(&gcstate->young.head); gc_list_merge(&gcstate->young.head, &increment); - gcstate->young.count = 0; gc_list_validate_space(&increment, gcstate->visited_space); Py_ssize_t increment_size = 0; while (increment_size < gcstate->work_to_do) { From 595b14cd0d508e5c53cd61a770e4056e3a23f4d9 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 15 Nov 2024 17:00:27 +0000 Subject: [PATCH 27/33] Clearer calculation of work to do. --- Include/internal/pycore_gc.h | 1 + Lib/test/test_gc.py | 26 +++++++------------- Modules/_testinternalcapi.c | 7 ++++++ Python/gc.c | 47 ++++++++++++++++++++++++++---------- 4 files changed, 51 insertions(+), 30 deletions(-) diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h index ea1b5ec4ede68c..c81e7a1de4b727 100644 --- a/Include/internal/pycore_gc.h +++ b/Include/internal/pycore_gc.h @@ -330,6 +330,7 @@ struct _gc_runtime_state { /* a list of callbacks to be invoked when collection is performed */ PyObject *callbacks; + Py_ssize_t prior_heap_size; Py_ssize_t heap_size; Py_ssize_t work_to_do; /* Which of the old spaces is the visited space */ diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 23592668de616f..4ca440b00ba49c 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -31,6 +31,11 @@ def __new__(cls, *args, **kwargs): return C ContainerNoGC = None +try: + import _testinternalcapi +except ImportError: + _testinternalcapi = None + ### Support code ############################################################################### @@ -1092,6 +1097,7 @@ def setUp(self): def tearDown(self): gc.disable() + @unittest.skipIf(_testinternalcapi is None, "requires _testinternalcapi") @requires_gil_enabled("Free threading does not support incremental GC") # Use small increments to emulate longer running process in a shorter time @gc_threshold(200, 10) @@ -1117,32 +1123,18 @@ def make_ll(depth): return head head = make_ll(1000) - count = 1000 - - # There will be some objects we aren't counting, - # e.g. the gc stats dicts. This test checks - # that the counts don't grow, so we try to - # correct for the uncounted objects - # This is just an estimate. - CORRECTION = 20 enabled = gc.isenabled() gc.enable() olds = [] + initial_heap_size = _testinternalcapi.get_heap_size() for i in range(20_000): newhead = make_ll(20) - count += 20 newhead.surprise = head olds.append(newhead) if len(olds) == 20: - stats = gc.get_stats() - young = stats[0] - incremental = stats[1] - old = stats[2] - collected = young['collected'] + incremental['collected'] + old['collected'] - count += CORRECTION - live = count - collected - self.assertLess(live, 27000) + new_objects = _testinternalcapi.get_heap_size() - initial_heap_size + self.assertLess(new_objects, 27_000) del olds[:] if not enabled: gc.disable() diff --git a/Modules/_testinternalcapi.c b/Modules/_testinternalcapi.c index b02f794d27d5bd..35cb215418e0cd 100644 --- a/Modules/_testinternalcapi.c +++ b/Modules/_testinternalcapi.c @@ -2077,6 +2077,12 @@ has_deferred_refcount(PyObject *self, PyObject *op) } +static PyObject * +get_heap_size(PyObject *self, PyObject *Py_UNUSED(ignored)) +{ + return PyLong_FromInt64(PyInterpreterState_Get()->gc.heap_size); +} + static PyMethodDef module_functions[] = { {"get_configs", get_configs, METH_NOARGS}, {"get_recursion_depth", get_recursion_depth, METH_NOARGS}, @@ -2174,6 +2180,7 @@ static PyMethodDef module_functions[] = { {"get_static_builtin_types", get_static_builtin_types, METH_NOARGS}, {"identify_type_slot_wrappers", identify_type_slot_wrappers, METH_NOARGS}, {"has_deferred_refcount", has_deferred_refcount, METH_O}, + {"get_heap_size", get_heap_size, METH_NOARGS}, {NULL, NULL} /* sentinel */ }; diff --git a/Python/gc.c b/Python/gc.c index 7e9f8d415ef580..c43ed85dcbb1cd 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -185,6 +185,7 @@ _PyGC_Init(PyInterpreterState *interp) if (gcstate->callbacks == NULL) { return _PyStatus_NO_MEMORY(); } + gcstate->prior_heap_size = 0; gcstate->heap_size = 0; return _PyStatus_OK(); @@ -1330,16 +1331,11 @@ gc_collect_young(PyThreadState *tstate, survivor_count++; } } - (void)survivor_count; // Silence compiler warning gc_list_merge(&survivors, visited); validate_old(gcstate); gcstate->young.count = 0; gcstate->old[gcstate->visited_space].count++; - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + gcstate->work_to_do += survivor_count * 4; add_stats(gcstate, 0, stats); } @@ -1559,13 +1555,43 @@ mark_at_start(PyThreadState *tstate) return objects_marked; } +static Py_ssize_t +assess_work_to_do(GCState *gcstate) +{ + /* The amount of work we want to do depends on two things. + * 1. The number of new objects created + * 2. The growth in heap size since the last collection + * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + */ + Py_ssize_t scale_factor = gcstate->old[0].threshold; + if (scale_factor < 2) { + scale_factor = 2; + } + Py_ssize_t new_objects = gcstate->young.count; + Py_ssize_t growth = gcstate->heap_size - gcstate->prior_heap_size; + if (growth < 0) { + growth = 0; + } + if (gcstate->heap_size < new_objects * scale_factor) { + // Small heap: ignore growth + growth = 0; + } + Py_ssize_t heap_fraction = gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; + if (heap_fraction > new_objects) { + heap_fraction = new_objects; + } + gcstate->young.count = 0; + gcstate->prior_heap_size = gcstate->heap_size; + return new_objects*2 + growth*2 + heap_fraction; +} + static void gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) { GC_STAT_ADD(1, collections, 1); GCState *gcstate = &tstate->interp->gc; - gcstate->work_to_do += gcstate->young.count; - gcstate->young.count = 0; + + gcstate->work_to_do += assess_work_to_do(gcstate); if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); @@ -1576,10 +1602,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) PyGC_Head *visited = &gcstate->old[gcstate->visited_space].head; PyGC_Head increment; gc_list_init(&increment); - Py_ssize_t scale_factor = gcstate->old[0].threshold; - if (scale_factor < 1) { - scale_factor = 1; - } Py_ssize_t objects_marked = mark_stacks(tstate->interp, visited, gcstate->visited_space, false); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); gcstate->work_to_do -= objects_marked; @@ -1607,7 +1629,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); - gcstate->work_to_do += gcstate->heap_size / SCAN_RATE_DIVISOR / scale_factor; gcstate->work_to_do -= increment_size; validate_old(gcstate); From 278059b79277200bed9597e73d1828163ab5bceb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 15 Nov 2024 18:27:13 +0000 Subject: [PATCH 28/33] Make sure tuples are untracked and avoid quadratic time validation --- Python/gc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index c43ed85dcbb1cd..56ae3a1a6777da 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1377,7 +1377,6 @@ visit_add_to_container(PyObject *op, void *arg) static Py_ssize_t expand_region_transitively_reachable(PyGC_Head *container, PyGC_Head *gc, GCState *gcstate) { - validate_list(container, collecting_clear_unreachable_clear); struct container_and_flag arg = { .container = container, .visited_space = gcstate->visited_space, @@ -1592,6 +1591,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GCState *gcstate = &tstate->interp->gc; gcstate->work_to_do += assess_work_to_do(gcstate); + untrack_tuples(&gcstate->young.head); if (gcstate->phase == GC_PHASE_MARK) { Py_ssize_t objects_marked = mark_at_start(tstate); GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); @@ -1606,7 +1606,6 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) GC_STAT_ADD(1, objects_transitively_reachable, objects_marked); gcstate->work_to_do -= objects_marked; gc_list_set_space(&gcstate->young.head, gcstate->visited_space); - untrack_tuples(&gcstate->young.head); gc_list_merge(&gcstate->young.head, &increment); gc_list_validate_space(&increment, gcstate->visited_space); Py_ssize_t increment_size = 0; From f186b4a810c01f6c0ce927334c184976505f86f4 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 18 Nov 2024 09:43:39 +0000 Subject: [PATCH 29/33] Update InternalDocs/garbage_collector.md Co-authored-by: Alex Waygood --- InternalDocs/garbage_collector.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/InternalDocs/garbage_collector.md b/InternalDocs/garbage_collector.md index 796b14a249847d..ea21928b50e34a 100644 --- a/InternalDocs/garbage_collector.md +++ b/InternalDocs/garbage_collector.md @@ -613,7 +613,7 @@ tracking status of the object. False >>> gc.is_tracked([]) True - >>> gc.is_tracked(("a": 1)) + >>> gc.is_tracked(("a", 1)) False >>> gc.is_tracked({"a": 1}) True From 5f6d04e7a5ba50d6584da66208332904648db050 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 18 Nov 2024 10:11:56 +0000 Subject: [PATCH 30/33] Remove unused variable --- Python/gc.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/Python/gc.c b/Python/gc.c index 56ae3a1a6777da..47ce370ca11924 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1244,15 +1244,10 @@ handle_resurrected_objects(PyGC_Head *unreachable, PyGC_Head* still_unreachable, gc_list_merge(resurrected, old_generation); } - -#define UNTRACK_TUPLES 1 -#define UNTRACK_DICTS 2 - static void gc_collect_region(PyThreadState *tstate, PyGC_Head *from, PyGC_Head *to, - int untrack, struct gc_collection_stats *stats); static inline Py_ssize_t @@ -1316,7 +1311,7 @@ gc_collect_young(PyThreadState *tstate, PyGC_Head survivors; gc_list_init(&survivors); gc_list_set_space(young, gcstate->visited_space); - gc_collect_region(tstate, young, &survivors, UNTRACK_TUPLES, stats); + gc_collect_region(tstate, young, &survivors, stats); Py_ssize_t survivor_count = 0; if (gcstate->visited_space) { /* objects in visited space have bit set, so we set it here */ @@ -1624,7 +1619,7 @@ gc_collect_increment(PyThreadState *tstate, struct gc_collection_stats *stats) gc_list_validate_space(&increment, gcstate->visited_space); PyGC_Head survivors; gc_list_init(&survivors); - gc_collect_region(tstate, &increment, &survivors, UNTRACK_TUPLES, stats); + gc_collect_region(tstate, &increment, &survivors, stats); gc_list_validate_space(&survivors, gcstate->visited_space); gc_list_merge(&survivors, visited); assert(gc_list_is_empty(&increment)); @@ -1662,9 +1657,7 @@ gc_collect_full(PyThreadState *tstate, GC_STAT_ADD(2, objects_not_transitively_reachable, gc_list_size(pending)); gcstate->young.count = 0; gc_list_set_space(pending, gcstate->visited_space); - gc_collect_region(tstate, pending, visited, - UNTRACK_TUPLES | UNTRACK_DICTS, - stats); + gc_collect_region(tstate, pending, visited, stats); gcstate->young.count = 0; gcstate->old[0].count = 0; gcstate->old[1].count = 0; @@ -1681,7 +1674,6 @@ static void gc_collect_region(PyThreadState *tstate, PyGC_Head *from, PyGC_Head *to, - int untrack, struct gc_collection_stats *stats) { PyGC_Head unreachable; /* non-problematic unreachable trash */ From 9cfb5f0cede47be373bb601689d8c59c03a67ace Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 18 Nov 2024 11:54:29 +0000 Subject: [PATCH 31/33] Tweak work to do calculation --- Lib/test/test_gc.py | 2 +- Python/gc.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/test/test_gc.py b/Lib/test/test_gc.py index 4ca440b00ba49c..6b2a6ec41fd39d 100644 --- a/Lib/test/test_gc.py +++ b/Lib/test/test_gc.py @@ -1134,7 +1134,7 @@ def make_ll(depth): olds.append(newhead) if len(olds) == 20: new_objects = _testinternalcapi.get_heap_size() - initial_heap_size - self.assertLess(new_objects, 27_000) + self.assertLess(new_objects, 25_000) del olds[:] if not enabled: gc.disable() diff --git a/Python/gc.c b/Python/gc.c index 47ce370ca11924..d4d09dbcffc5ad 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1576,7 +1576,7 @@ assess_work_to_do(GCState *gcstate) } gcstate->young.count = 0; gcstate->prior_heap_size = gcstate->heap_size; - return new_objects*2 + growth*2 + heap_fraction; + return new_objects*3/2 + growth*2 + heap_fraction*3/2; } static void From c7683a4b06de9de0a80309912120f4a515efd5a6 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 18 Nov 2024 12:46:28 +0000 Subject: [PATCH 32/33] Explain work to do calculation --- Python/gc.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/Python/gc.c b/Python/gc.c index d4d09dbcffc5ad..60b41377654af4 100644 --- a/Python/gc.c +++ b/Python/gc.c @@ -1552,10 +1552,17 @@ mark_at_start(PyThreadState *tstate) static Py_ssize_t assess_work_to_do(GCState *gcstate) { - /* The amount of work we want to do depends on two things. + /* The amount of work we want to do depends on three things. * 1. The number of new objects created * 2. The growth in heap size since the last collection * 3. The heap size (up to the number of new objects, to avoid quadratic effects) + * + * For a steady state heap, the amount of work to do is three times the number + * of new objects added to the heap. This ensures that we stay ahead in the + * worst case of all new objects being garbage. + * + * This could be improved by tracking survival rates, but it is still a + * large improvement on the non-marking approach. */ Py_ssize_t scale_factor = gcstate->old[0].threshold; if (scale_factor < 2) { From 170ea6d9ac4907e4112a76babf36027fd7e4e303 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Mon, 18 Nov 2024 13:43:01 +0000 Subject: [PATCH 33/33] Initialize field to prevent code analyzer warning. --- Python/ceval.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Python/ceval.c b/Python/ceval.c index 9a608f06966688..3360e50d57fbcb 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -821,6 +821,7 @@ _PyEval_EvalFrameDefault(PyThreadState *tstate, _PyInterpreterFrame *frame, int entry_frame.instr_ptr = (_Py_CODEUNIT *)_Py_INTERPRETER_TRAMPOLINE_INSTRUCTIONS + 1; entry_frame.stackpointer = entry_frame.localsplus; entry_frame.owner = FRAME_OWNED_BY_CSTACK; + entry_frame.visited = 0; entry_frame.return_offset = 0; /* Push frame */ entry_frame.previous = tstate->current_frame;