From 6f6b4cd405f119fde88e3ee24d5d12063a04dc67 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 22 Mar 2025 20:05:29 +0000 Subject: [PATCH 01/36] gh-131591: Implement PEP 768 --- Include/cpython/initconfig.h | 1 + Include/cpython/pystate.h | 9 + Include/internal/pycore_debug_offsets.h | 17 + .../pycore_global_objects_fini_generated.h | 1 + Include/internal/pycore_global_strings.h | 1 + Include/internal/pycore_interp_structs.h | 1 - .../internal/pycore_runtime_init_generated.h | 1 + Include/internal/pycore_sysmodule.h | 2 + .../internal/pycore_unicodeobject_generated.h | 4 + Makefile.pre.in | 1 + Python/ceval_gil.c | 27 + Python/clinic/sysmodule.c.h | 95 ++- Python/initconfig.c | 35 + Python/remote_debugging.c | 639 ++++++++++++++++++ Python/sysmodule.c | 93 +++ configure | 30 + configure.ac | 20 + pyconfig.h.in | 3 + 18 files changed, 978 insertions(+), 2 deletions(-) create mode 100644 Python/remote_debugging.c diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h index 8ef19f677066c2..2932fa6c9809e9 100644 --- a/Include/cpython/initconfig.h +++ b/Include/cpython/initconfig.h @@ -143,6 +143,7 @@ typedef struct PyConfig { int faulthandler; int tracemalloc; int perf_profiling; + int remote_debug; int import_time; int code_debug_ranges; int show_ref_count; diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 65bc11ca0f5ba9..7a04c1409fdeba 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -29,6 +29,14 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); #define PyTrace_C_RETURN 6 #define PyTrace_OPCODE 7 +/* Remote debugger support */ +# define MAX_SCRIPT_PATH_SIZE 512 +typedef struct _remote_debugger_support { + int enabled; + int debugger_pending_call; + char debugger_script_path[MAX_SCRIPT_PATH_SIZE]; +} _PyRemoteDebuggerSupport; + typedef struct _err_stackitem { /* This struct represents a single execution context where we might * be currently handling an exception. It is a per-coroutine state @@ -202,6 +210,7 @@ struct _ts { The PyThreadObject must hold the only reference to this value. */ PyObject *threading_local_sentinel; + _PyRemoteDebuggerSupport remote_debugger_support; }; # define Py_C_RECURSION_LIMIT 5000 diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index a61096c17f143b..47c66c70ae455f 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -206,6 +206,15 @@ typedef struct _Py_DebugOffsets { uint64_t gi_iframe; uint64_t gi_frame_state; } gen_object; + + struct _debugger_support { + uint64_t eval_breaker; + uint64_t remote_debugger_support; + uint64_t remote_debugging_enabled; + uint64_t debugger_pending_call; + uint64_t debugger_script_path; + uint64_t debugger_script_path_size; + } debugger_support; } _Py_DebugOffsets; @@ -326,6 +335,14 @@ typedef struct _Py_DebugOffsets { .gi_iframe = offsetof(PyGenObject, gi_iframe), \ .gi_frame_state = offsetof(PyGenObject, gi_frame_state), \ }, \ + .debugger_support = { \ + .eval_breaker = offsetof(PyThreadState, eval_breaker), \ + .remote_debugger_support = offsetof(PyThreadState, remote_debugger_support), \ + .remote_debugging_enabled = offsetof(PyInterpreterState, config.remote_debug), \ + .debugger_pending_call = offsetof(_PyRemoteDebuggerSupport, debugger_pending_call), \ + .debugger_script_path = offsetof(_PyRemoteDebuggerSupport, debugger_script_path), \ + .debugger_script_path_size = MAX_SCRIPT_PATH_SIZE, \ + }, \ } diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h index 90214a314031d1..91cf2ec0c7ac66 100644 --- a/Include/internal/pycore_global_objects_fini_generated.h +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -1185,6 +1185,7 @@ _PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(salt)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sched_priority)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(scheduler)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(script)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(second)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(security_attributes)); _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seek)); diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h index 5056128dc97ca0..2e31ce61967f55 100644 --- a/Include/internal/pycore_global_strings.h +++ b/Include/internal/pycore_global_strings.h @@ -676,6 +676,7 @@ struct _Py_global_strings { STRUCT_FOR_ID(salt) STRUCT_FOR_ID(sched_priority) STRUCT_FOR_ID(scheduler) + STRUCT_FOR_ID(script) STRUCT_FOR_ID(second) STRUCT_FOR_ID(security_attributes) STRUCT_FOR_ID(seek) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index ee92a1e6f2dec9..26aac26382fe49 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -936,7 +936,6 @@ struct _is { _PyThreadStateImpl _initial_thread; // _initial_thread should be the last field of PyInterpreterState. // See https://github.com/python/cpython/issues/127117. - #if !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) uint64_t next_stackref; _Py_hashtable_t *open_stackrefs_table; diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h index 4f928cc050bf8e..dbf44308ebaa94 100644 --- a/Include/internal/pycore_runtime_init_generated.h +++ b/Include/internal/pycore_runtime_init_generated.h @@ -1183,6 +1183,7 @@ extern "C" { INIT_ID(salt), \ INIT_ID(sched_priority), \ INIT_ID(scheduler), \ + INIT_ID(script), \ INIT_ID(second), \ INIT_ID(security_attributes), \ INIT_ID(seek), \ diff --git a/Include/internal/pycore_sysmodule.h b/Include/internal/pycore_sysmodule.h index 9536579e965f7b..008a2da0d04fa7 100644 --- a/Include/internal/pycore_sysmodule.h +++ b/Include/internal/pycore_sysmodule.h @@ -24,6 +24,8 @@ extern int _PySys_ClearAttrString(PyInterpreterState *interp, extern int _PySys_SetFlagObj(Py_ssize_t pos, PyObject *new_value); extern int _PySys_SetIntMaxStrDigits(int maxdigits); +extern int _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path); + #ifdef __cplusplus } #endif diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h index 5b78d038fc1192..afa22ef9748b5f 100644 --- a/Include/internal/pycore_unicodeobject_generated.h +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -2492,6 +2492,10 @@ _PyUnicode_InitStaticStrings(PyInterpreterState *interp) { _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(script); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); string = &_Py_ID(second); _PyUnicode_InternStatic(interp, &string); assert(_PyUnicode_CheckConsistency(string, 1)); diff --git a/Makefile.pre.in b/Makefile.pre.in index 9658bfa44b98e4..949d7555dd5547 100644 --- a/Makefile.pre.in +++ b/Makefile.pre.in @@ -506,6 +506,7 @@ PYTHON_OBJS= \ Python/suggestions.o \ Python/perf_trampoline.o \ Python/perf_jit_trampoline.o \ + Python/remote_debugging.o \ Python/$(DYNLOADFILE) \ $(LIBOBJS) \ $(MACHDEP_OBJS) \ diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 77632b283b256c..265e6d4a0a626c 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1319,5 +1319,32 @@ _Py_HandlePending(PyThreadState *tstate) return -1; } } + +#ifdef Py_REMOTE_DEBUG + const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp); + if (config->remote_debug) { + if (tstate->remote_debugger_support.debugger_pending_call) { + tstate->remote_debugger_support.debugger_pending_call = 0; + const char *path = tstate->remote_debugger_support.debugger_script_path; + if (*path) { + if (0 != PySys_Audit("debugger_script", "%s", path)) { + PyErr_Clear(); + } else { + FILE* f = fopen(path, "r"); + if (!f) { + PyErr_SetFromErrno(PyExc_OSError); + } else { + PyRun_AnyFile(f, path); + fclose(f); + } + if (PyErr_Occurred()) { + PyErr_FormatUnraisable("Error executing debugger script %s", path); + } + } + } + } + } +#endif + return 0; } diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 1e53624d4d45d7..901d4b2095f4ac 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1511,6 +1511,99 @@ sys_is_stack_trampoline_active(PyObject *module, PyObject *Py_UNUSED(ignored)) return sys_is_stack_trampoline_active_impl(module); } +PyDoc_STRVAR(sys_is_remote_debug_enabled__doc__, +"is_remote_debug_enabled($module, /)\n" +"--\n" +"\n" +"Return True if remote debugging is enabled, False otherwise.\n" +"\n" +"If no stack profiler is activated, this function has no effect."); + +#define SYS_IS_REMOTE_DEBUG_ENABLED_METHODDEF \ + {"is_remote_debug_enabled", (PyCFunction)sys_is_remote_debug_enabled, METH_NOARGS, sys_is_remote_debug_enabled__doc__}, + +static PyObject * +sys_is_remote_debug_enabled_impl(PyObject *module); + +static PyObject * +sys_is_remote_debug_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) +{ + return sys_is_remote_debug_enabled_impl(module); +} + +PyDoc_STRVAR(sys_remote_exec__doc__, +"remote_exec($module, /, pid, script)\n" +"--\n" +"\n" +"Executes a file containing Python code in a given remote Python process.\n" +"\n" +"This function returns immediately, and the code will be executed by the\n" +"target process\'s main thread at the next available opportunity, similarly\n" +"to how signals are handled. There is no interface to determine when the\n" +"code has been executed. The caller is responsible for making sure that\n" +"the file still exists whenever the remote process tries to read it and that\n" +"it hasn\'t been overwritten.\n" +"\n" +"Args:\n" +" pid (int): The process ID of the target Python process.\n" +" script (str|bytes|PathLike): The path to a file containing\n" +" the Python code to be executed."); + +#define SYS_REMOTE_EXEC_METHODDEF \ + {"remote_exec", _PyCFunction_CAST(sys_remote_exec), METH_FASTCALL|METH_KEYWORDS, sys_remote_exec__doc__}, + +static PyObject * +sys_remote_exec_impl(PyObject *module, int pid, PyObject *script); + +static PyObject * +sys_remote_exec(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 2 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(pid), &_Py_ID(script), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"pid", "script", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "remote_exec", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[2]; + int pid; + PyObject *script; + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, + /*minpos*/ 2, /*maxpos*/ 2, /*minkw*/ 0, /*varpos*/ 0, argsbuf); + if (!args) { + goto exit; + } + pid = PyLong_AsInt(args[0]); + if (pid == -1 && PyErr_Occurred()) { + goto exit; + } + script = args[1]; + return_value = sys_remote_exec_impl(module, pid, script); + +exit: + return return_value; +} + PyDoc_STRVAR(sys__dump_tracelets__doc__, "_dump_tracelets($module, /, outpath)\n" "--\n" @@ -1754,4 +1847,4 @@ sys__is_gil_enabled(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=1e5f608092c12636 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c62c63e3430ae73b input=a9049054013a1b77]*/ diff --git a/Python/initconfig.c b/Python/initconfig.c index f73fbe76a96507..7a149388b84161 100644 --- a/Python/initconfig.c +++ b/Python/initconfig.c @@ -162,6 +162,7 @@ static const PyConfigSpec PYCONFIG_SPEC[] = { SPEC(parse_argv, BOOL, READ_ONLY, NO_SYS), SPEC(pathconfig_warnings, BOOL, READ_ONLY, NO_SYS), SPEC(perf_profiling, UINT, READ_ONLY, NO_SYS), + SPEC(remote_debug, BOOL, READ_ONLY, NO_SYS), SPEC(program_name, WSTR, READ_ONLY, NO_SYS), SPEC(run_command, WSTR_OPT, READ_ONLY, NO_SYS), SPEC(run_filename, WSTR_OPT, READ_ONLY, NO_SYS), @@ -317,6 +318,7 @@ The following implementation-specific options are available:\n\ -X perf: support the Linux \"perf\" profiler; also PYTHONPERFSUPPORT=1\n\ -X perf_jit: support the Linux \"perf\" profiler with DWARF support;\n\ also PYTHON_PERF_JIT_SUPPORT=1\n\ +-X disable-remote-debug: disable remote debugging; also PYTHON_DISABLE_REMOTE_DEBUG\n\ " #ifdef Py_DEBUG "-X presite=MOD: import this module before site; also PYTHON_PRESITE\n" @@ -994,6 +996,7 @@ _PyConfig_InitCompatConfig(PyConfig *config) config->faulthandler = -1; config->tracemalloc = -1; config->perf_profiling = -1; + config->remote_debug = -1; config->module_search_paths_set = 0; config->parse_argv = 0; config->site_import = -1; @@ -1986,6 +1989,28 @@ config_init_perf_profiling(PyConfig *config) } +static PyStatus +config_init_remote_debug(PyConfig *config) +{ +#ifndef Py_REMOTE_DEBUG + config->remote_debug = 0; +#else + int active = 1; + const char *env = Py_GETENV("PYTHON_DISABLE_REMOTE_DEBUG"); + if (env) { + active = 0; + } + const wchar_t *xoption = config_get_xoption(config, L"disable-remote-debug"); + if (xoption) { + active = 0; + } + + config->remote_debug = active; +#endif + return _PyStatus_OK(); + +} + static PyStatus config_init_tracemalloc(PyConfig *config) { @@ -2170,6 +2195,13 @@ config_read_complex_options(PyConfig *config) } } + if (config->remote_debug < 0) { + status = config_init_remote_debug(config); + if (_PyStatus_EXCEPTION(status)) { + return status; + } + } + if (config->int_max_str_digits < 0) { status = config_init_int_max_str_digits(config); if (_PyStatus_EXCEPTION(status)) { @@ -2531,6 +2563,9 @@ config_read(PyConfig *config, int compute_path_config) if (config->perf_profiling < 0) { config->perf_profiling = 0; } + if (config->remote_debug < 0) { + config->remote_debug = -1; + } if (config->use_hash_seed < 0) { config->use_hash_seed = 0; config->hash_seed = 0; diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c new file mode 100644 index 00000000000000..9e8cc8505697a4 --- /dev/null +++ b/Python/remote_debugging.c @@ -0,0 +1,639 @@ +#define _GNU_SOURCE + +#ifdef __linux__ +# include +# include +# if INTPTR_MAX == INT64_MAX +# define Elf_Ehdr Elf64_Ehdr +# define Elf_Shdr Elf64_Shdr +# define Elf_Phdr Elf64_Phdr +# else +# define Elf_Ehdr Elf32_Ehdr +# define Elf_Shdr Elf32_Shdr +# define Elf_Phdr Elf32_Phdr +# endif +# include +#endif + +#if defined(__APPLE__) +# include +// Older macOS SDKs do not define TARGET_OS_OSX +# if !defined(TARGET_OS_OSX) +# define TARGET_OS_OSX 1 +# endif +# if TARGET_OS_OSX +# include +# include +# include +# include +# include +# include +# include +# include +# include +# include +# endif +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef Py_BUILD_CORE_BUILTIN +# define Py_BUILD_CORE_MODULE 1 +#endif +#include "Python.h" +#include +#include + +#ifndef HAVE_PROCESS_VM_READV +# define HAVE_PROCESS_VM_READV 0 +#endif + +/*[clinic input] +module _pdb +[clinic start generated code]*/ +/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7fb1cf2618bcf972]*/ + +#if defined(__APPLE__) && TARGET_OS_OSX +static uintptr_t +return_section_address(const char* section, mach_port_t proc_ref, uintptr_t base, void* map) +{ + struct mach_header_64* hdr = (struct mach_header_64*)map; + int ncmds = hdr->ncmds; + + int cmd_cnt = 0; + struct segment_command_64* cmd = map + sizeof(struct mach_header_64); + + mach_vm_size_t size = 0; + mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); + mach_vm_address_t address = (mach_vm_address_t)base; + vm_region_basic_info_data_64_t region_info; + mach_port_t object_name; + uintptr_t vmaddr = 0; + + for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) { + if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__TEXT") == 0) { + vmaddr = cmd->vmaddr; + } + if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__DATA") == 0) { + while (cmd->filesize != size) { + address += size; + if (mach_vm_region( + proc_ref, + &address, + &size, + VM_REGION_BASIC_INFO_64, + (vm_region_info_t)®ion_info, // cppcheck-suppress [uninitvar] + &count, + &object_name) + != KERN_SUCCESS) + { + PyErr_SetString(PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + return 0; + } + } + + int nsects = cmd->nsects; + struct section_64* sec = + (struct section_64*)((void*)cmd + sizeof(struct segment_command_64)); + for (int j = 0; j < nsects; j++) { + if (strcmp(sec[j].sectname, section) == 0) { + return base + sec[j].addr - vmaddr; + } + } + cmd_cnt++; + } + + cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize); + } + return 0; +} + +static uintptr_t +search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_size_t size, mach_port_t proc_ref) +{ + int fd = open(path, O_RDONLY); + if (fd == -1) { + PyErr_Format(PyExc_RuntimeError, "Cannot open binary %s\n", path); + return 0; + } + + struct stat fs; + if (fstat(fd, &fs) == -1) { + PyErr_Format(PyExc_RuntimeError, "Cannot get size of binary %s\n", path); + close(fd); + return 0; + } + + void* map = mmap(0, fs.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (map == MAP_FAILED) { + PyErr_Format(PyExc_RuntimeError, "Cannot map binary %s\n", path); + close(fd); + return 0; + } + + uintptr_t result = 0; + + struct mach_header_64* hdr = (struct mach_header_64*)map; + switch (hdr->magic) { + case MH_MAGIC: + case MH_CIGAM: + case FAT_MAGIC: + case FAT_CIGAM: + PyErr_SetString(PyExc_RuntimeError, "32-bit Mach-O binaries are not supported"); + break; + case MH_MAGIC_64: + case MH_CIGAM_64: + result = return_section_address(secname, proc_ref, base, map); + break; + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); + break; + } + + munmap(map, fs.st_size); + if (close(fd) != 0) { + PyErr_SetFromErrno(PyExc_OSError); + } + return result; +} + +static mach_port_t +pid_to_task(pid_t pid) +{ + mach_port_t task; + kern_return_t result; + + result = task_for_pid(mach_task_self(), pid, &task); + if (result != KERN_SUCCESS) { + PyErr_Format(PyExc_PermissionError, "Cannot get task for PID %d", pid); + return 0; + } + return task; +} + +static uintptr_t +search_map_for_section(pid_t pid, const char* secname, const char* substr) { + mach_vm_address_t address = 0; + mach_vm_size_t size = 0; + mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); + vm_region_basic_info_data_64_t region_info; + mach_port_t object_name; + + mach_port_t proc_ref = pid_to_task(pid); + if (proc_ref == 0) { + PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID"); + return 0; + } + + int match_found = 0; + char map_filename[MAXPATHLEN + 1]; + while (mach_vm_region( + proc_ref, + &address, + &size, + VM_REGION_BASIC_INFO_64, + (vm_region_info_t)®ion_info, + &count, + &object_name) + == KERN_SUCCESS) + { + int path_len = proc_regionfilename(pid, address, map_filename, MAXPATHLEN); + if (path_len == 0) { + address += size; + continue; + } + + char* filename = strrchr(map_filename, '/'); + if (filename != NULL) { + filename++; // Move past the '/' + } else { + filename = map_filename; // No path, use the whole string + } + + if (!match_found && strncmp(filename, substr, strlen(substr)) == 0) { + match_found = 1; + return search_section_in_file(secname, map_filename, address, size, proc_ref); + } + + address += size; + } + return 0; +} + +#endif + +#ifdef __linux__ +static uintptr_t +find_map_start_address(pid_t pid, char* result_filename, const char* map) +{ + char maps_file_path[64]; + sprintf(maps_file_path, "/proc/%d/maps", pid); + + FILE* maps_file = fopen(maps_file_path, "r"); + if (maps_file == NULL) { + PyErr_SetFromErrno(PyExc_OSError); + return 0; + } + + int match_found = 0; + + char line[256]; + char map_filename[PATH_MAX]; + uintptr_t result_address = 0; + while (fgets(line, sizeof(line), maps_file) != NULL) { + unsigned long start_address = 0; + sscanf(line, "%lx-%*x %*s %*s %*s %*s %s", &start_address, map_filename); + char* filename = strrchr(map_filename, '/'); + if (filename != NULL) { + filename++; // Move past the '/' + } else { + filename = map_filename; // No path, use the whole string + } + + if (!match_found && strncmp(filename, map, strlen(map)) == 0) { + match_found = 1; + result_address = start_address; + strcpy(result_filename, map_filename); + break; + } + } + + fclose(maps_file); + + if (!match_found) { + map_filename[0] = '\0'; + } + + return result_address; +} + +static uintptr_t +search_map_for_section(pid_t pid, const char* secname, const char* map) +{ + char elf_file[256]; + uintptr_t start_address = find_map_start_address(pid, elf_file, map); + + if (start_address == 0) { + return 0; + } + + uintptr_t result = 0; + void* file_memory = NULL; + + int fd = open(elf_file, O_RDONLY); + if (fd < 0) { + PyErr_SetFromErrno(PyExc_OSError); + goto exit; + } + + struct stat file_stats; + if (fstat(fd, &file_stats) != 0) { + PyErr_SetFromErrno(PyExc_OSError); + goto exit; + } + + file_memory = mmap(NULL, file_stats.st_size, PROT_READ, MAP_PRIVATE, fd, 0); + if (file_memory == MAP_FAILED) { + PyErr_SetFromErrno(PyExc_OSError); + goto exit; + } + + Elf_Ehdr* elf_header = (Elf_Ehdr*)file_memory; + + Elf_Shdr* section_header_table = (Elf_Shdr*)(file_memory + elf_header->e_shoff); + + Elf_Shdr* shstrtab_section = §ion_header_table[elf_header->e_shstrndx]; + char* shstrtab = (char*)(file_memory + shstrtab_section->sh_offset); + + Elf_Shdr* section = NULL; + for (int i = 0; i < elf_header->e_shnum; i++) { + char* this_sec_name = shstrtab + section_header_table[i].sh_name; + // Move 1 character to account for the leading "." + this_sec_name += 1; + if (strcmp(secname, this_sec_name) == 0) { + section = §ion_header_table[i]; + break; + } + } + + Elf_Phdr* program_header_table = (Elf_Phdr*)(file_memory + elf_header->e_phoff); + // Find the first PT_LOAD segment + Elf_Phdr* first_load_segment = NULL; + for (int i = 0; i < elf_header->e_phnum; i++) { + if (program_header_table[i].p_type == PT_LOAD) { + first_load_segment = &program_header_table[i]; + break; + } + } + + if (section != NULL && first_load_segment != NULL) { + uintptr_t elf_load_addr = first_load_segment->p_vaddr + - (first_load_segment->p_vaddr % first_load_segment->p_align); + result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; + } + +exit: + if (close(fd) != 0) { + PyErr_SetFromErrno(PyExc_OSError); + } + if (file_memory != NULL) { + munmap(file_memory, file_stats.st_size); + } + return result; +} + +#endif + +static uintptr_t +get_py_runtime(pid_t pid) +{ + uintptr_t address = search_map_for_section(pid, "PyRuntime", "libpython"); + if (address == 0) { + address = search_map_for_section(pid, "PyRuntime", "python"); + } + return address; +} + +static ssize_t +read_memory(pid_t pid, uintptr_t remote_address, size_t len, void* dst) +{ + ssize_t total_bytes = 0; +#if defined(__linux__) && HAVE_PROCESS_VM_READV + struct iovec local[1]; + struct iovec remote[1]; + ssize_t result = 0; + ssize_t read = 0; + + do { + local[0].iov_base = dst + result; + local[0].iov_len = len - result; + remote[0].iov_base = (void*)(remote_address + result); + remote[0].iov_len = len - result; + + read = process_vm_readv(pid, local, 1, remote, 1, 0); + if (read < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + + result += read; + } while ((size_t)read != local[0].iov_len); + total_bytes = result; +#elif defined(__APPLE__) && TARGET_OS_OSX + ssize_t result = -1; + kern_return_t kr = mach_vm_read_overwrite( + pid_to_task(pid), + (mach_vm_address_t)remote_address, + len, + (mach_vm_address_t)dst, + (mach_vm_size_t*)&result); + + if (kr != KERN_SUCCESS) { + switch (kr) { + case KERN_PROTECTION_FAILURE: + PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory"); + break; + case KERN_INVALID_ARGUMENT: + PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite"); + break; + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory"); + } + return -1; + } + total_bytes = len; +#else + return -1; +#endif + return total_bytes; +} + +ssize_t +write_memory(pid_t pid, uintptr_t remote_address, size_t len, const void* src) +{ + ssize_t total_bytes_written = 0; +#if defined(__linux__) && HAVE_PROCESS_VM_READV + struct iovec local[1]; + struct iovec remote[1]; + ssize_t result = 0; + ssize_t written = 0; + + do { + local[0].iov_base = (void*)((char*)src + result); + local[0].iov_len = len - result; + remote[0].iov_base = (void*)((char*)remote_address + result); + remote[0].iov_len = len - result; + + written = process_vm_writev(pid, local, 1, remote, 1, 0); + if (written < 0) { + PyErr_SetFromErrno(PyExc_OSError); + return -1; + } + + result += written; + } while ((size_t)written != local[0].iov_len); + total_bytes_written = result; +#elif defined(__APPLE__) && TARGET_OS_OSX + kern_return_t kr = mach_vm_write( + pid_to_task(pid), + (mach_vm_address_t)remote_address, + (vm_offset_t)src, + (mach_msg_type_number_t)len); + + if (kr != KERN_SUCCESS) { + switch (kr) { + case KERN_PROTECTION_FAILURE: + PyErr_SetString(PyExc_PermissionError, "Not enough permissions to write memory"); + break; + case KERN_INVALID_ARGUMENT: + PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_write"); + break; + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown error writing memory"); + } + return -1; + } + total_bytes_written = len; +#else + PyErr_Format(PyExc_RuntimeError, "Writing memory is not supported on this platform"); + return -1; +#endif + return total_bytes_written; +} + +static int +read_offsets( + int pid, + uintptr_t *runtime_start_address, + _Py_DebugOffsets* debug_offsets +) { + *runtime_start_address = get_py_runtime(pid); + if (!*runtime_start_address) { + if (!PyErr_Occurred()) { + PyErr_SetString( + PyExc_RuntimeError, "Failed to get .PyRuntime address"); + } + return -1; + } + size_t size = sizeof(struct _Py_DebugOffsets); + ssize_t bytes = read_memory( + pid, *runtime_start_address, size, debug_offsets); + if (bytes == -1) { + return -1; + } + return 0; +} + +int +_PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) +{ +#if (!defined(__linux__) && !defined(__APPLE__)) || (defined(__linux__) && !HAVE_PROCESS_VM_READV) + PyErr_SetString(PyExc_RuntimeError, "get_stack_trace is not supported on this platform"); + return -1; +#endif + if (debugger_script_path != NULL && strlen(debugger_script_path) > PATH_MAX) { + PyErr_SetString(PyExc_ValueError, "Debugger script path is too long"); + return -1; + } + + uintptr_t runtime_start_address = get_py_runtime(pid); + struct _Py_DebugOffsets local_debug_offsets; + + if (read_offsets(pid, &runtime_start_address, &local_debug_offsets)) { + return -1; + } + + off_t interpreter_state_list_head = local_debug_offsets.runtime_state.interpreters_head; + + uintptr_t address_of_interpreter_state; + int bytes = read_memory( + pid, + runtime_start_address + interpreter_state_list_head, + sizeof(void*), + &address_of_interpreter_state); + if (bytes == -1) { + return -1; + } + + if (address_of_interpreter_state == 0) { + PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + return -1; + } + + int is_remote_debugging_enabled = 0; + bytes = read_memory( + pid, + address_of_interpreter_state + local_debug_offsets.debugger_support.remote_debugging_enabled, + sizeof(int), + &is_remote_debugging_enabled); + if (bytes == -1) { + return -1; + } + if (is_remote_debugging_enabled == 0) { + PyErr_SetString(PyExc_RuntimeError, "Remote debugging is not enabled in the remote process"); + return -1; + } + + uintptr_t address_of_thread; + bytes = read_memory( + pid, + address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_head, + sizeof(void*), + &address_of_thread); + if (bytes == -1) { + return -1; + } + + pid_t this_tid = 0; + if (tid != 0) { + while (address_of_thread != 0) { + bytes = read_memory( + pid, + address_of_thread + local_debug_offsets.thread_state.native_thread_id, + sizeof(pid_t), + &this_tid); + if (bytes == -1) { + return -1; + } + if (this_tid == tid) { + break; + } + bytes = read_memory( + pid, + address_of_thread + local_debug_offsets.thread_state.next, + sizeof(void*), + &address_of_thread); + if (bytes == -1) { + return -1; + } + } + } + + if (address_of_thread == 0) { + PyErr_SetString(PyExc_RuntimeError, "No thread state found"); + return -1; + } + + uintptr_t eval_breaker; + bytes = read_memory( + pid, + address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker); + if (bytes == -1) { + return -1; + } + eval_breaker |= _PY_EVAL_PLEASE_STOP_BIT; + + bytes = write_memory( + pid, + address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker); + + if (bytes == -1) { + return -1; + } + + int pending_call = 1; + uintptr_t debugger_pending_call_addr = ( + address_of_thread + + local_debug_offsets.debugger_support.remote_debugger_support + + local_debug_offsets.debugger_support.debugger_pending_call); + bytes = write_memory( + pid, + debugger_pending_call_addr, + sizeof(int), + &pending_call); + + if (bytes == -1) { + return -1; + } + + if (debugger_script_path != NULL) { + uintptr_t debugger_script_path_addr = ( + address_of_thread + + local_debug_offsets.debugger_support.remote_debugger_support + + local_debug_offsets.debugger_support.debugger_script_path); + bytes = write_memory( + pid, + debugger_script_path_addr, + strlen(debugger_script_path) + 1, + debugger_script_path); + if (bytes == -1) { + return -1; + } + } + + return 0; +} \ No newline at end of file diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 1b2019a9f74d42..50f82f7aba1452 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2421,6 +2421,97 @@ sys_is_stack_trampoline_active_impl(PyObject *module) Py_RETURN_FALSE; } + +/*[clinic input] +sys.is_remote_debug_enabled + +Return True if remote debugging is enabled, False otherwise. + +If no stack profiler is activated, this function has no effect. +[clinic start generated code]*/ + +static PyObject * +sys_is_remote_debug_enabled_impl(PyObject *module) +/*[clinic end generated code: output=7ca3d38bdd5935eb input=c8b9354cb7a08ed6]*/ +{ +#ifndef Py_REMOTE_DEBUG + Py_RETURN_FALSE; +#else + const PyConfig *config = _Py_GetConfig(); + return PyBool_FromLong(config->remote_debug); +#endif +} + +/*[clinic input] +sys.remote_exec + + pid: int + script: object + +Executes a file containing Python code in a given remote Python process. + +This function returns immediately, and the code will be executed by the +target process's main thread at the next available opportunity, similarly +to how signals are handled. There is no interface to determine when the +code has been executed. The caller is responsible for making sure that +the file still exists whenever the remote process tries to read it and that +it hasn't been overwritten. + +Args: + pid (int): The process ID of the target Python process. + script (str|bytes|PathLike): The path to a file containing + the Python code to be executed. +[clinic start generated code]*/ + +static PyObject * +sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) +/*[clinic end generated code: output=7d94c56afe4a52c0 input=5749b0253d5b588c]*/ +{ + const char *debugger_script_path = PyUnicode_AsUTF8(script); + if (debugger_script_path == NULL) { + return NULL; + } + +#ifdef MS_WINDOWS + DWORD attr = GetFileAttributesA(debugger_script_path); + if (attr == INVALID_FILE_ATTRIBUTES) { + DWORD err = GetLastError(); + if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) { + PyErr_SetString(PyExc_FileNotFoundError, "Script file does not exist"); + } + else if (err == ERROR_ACCESS_DENIED) { + PyErr_SetString(PyExc_PermissionError, "Script file cannot be read"); + } + else { + PyErr_SetFromWindowsErr(0); + } + return NULL; + } +#else + if (access(debugger_script_path, F_OK | R_OK) != 0) { + switch (errno) { + case ENOENT: + PyErr_SetString(PyExc_FileNotFoundError, "Script file does not exist"); + break; + case EACCES: + PyErr_SetString(PyExc_PermissionError, "Script file cannot be read"); + break; + default: + PyErr_SetFromErrno(PyExc_OSError); + } + return NULL; + } +#endif + + if (_PySysRemoteDebug_SendExec(pid, 0, debugger_script_path) < 0) { + return NULL; + } + + Py_RETURN_NONE; +} + + + /*[clinic input] sys._dump_tracelets @@ -2695,6 +2786,8 @@ static PyMethodDef sys_methods[] = { SYS_ACTIVATE_STACK_TRAMPOLINE_METHODDEF SYS_DEACTIVATE_STACK_TRAMPOLINE_METHODDEF SYS_IS_STACK_TRAMPOLINE_ACTIVE_METHODDEF + SYS_IS_REMOTE_DEBUG_ENABLED_METHODDEF + SYS_REMOTE_EXEC_METHODDEF SYS_UNRAISABLEHOOK_METHODDEF SYS_GET_INT_MAX_STR_DIGITS_METHODDEF SYS_SET_INT_MAX_STR_DIGITS_METHODDEF diff --git a/configure b/configure index a058553480ca5a..d7153914fe7b5e 100755 --- a/configure +++ b/configure @@ -1123,6 +1123,7 @@ with_wheel_pkg_dir with_readline with_computed_gotos with_tail_call_interp +with_remote_debug with_ensurepip with_openssl with_openssl_rpath @@ -1932,6 +1933,7 @@ Optional Packages: default on supported compilers) --with-tail-call-interp enable tail-calling interpreter in evaluation loop and rest of CPython + --with-remote-debug enable remote debugging support (default is yes) --with-ensurepip[=install|upgrade|no] "install" or "upgrade" using bundled pip (default is upgrade) @@ -29302,6 +29304,34 @@ esac fi +# Check for --with-remote-debug +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for --with-remote-debug" >&5 +printf %s "checking for --with-remote-debug... " >&6; } + +# Check whether --with-remote-debug was given. +if test ${with_remote_debug+y} +then : + withval=$with_remote_debug; +else case e in #( + e) with_remote_debug=yes ;; +esac +fi + + +if test "$with_remote_debug" = yes; then + +printf "%s\n" "#define Py_REMOTE_DEBUG 1" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +else + +printf "%s\n" "#define Py_REMOTE_DEBUG 0" >>confdefs.h + + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +fi + case $ac_sys_system in AIX*) diff --git a/configure.ac b/configure.ac index 23bd81ed4431b9..4e24930662c1f8 100644 --- a/configure.ac +++ b/configure.ac @@ -7034,6 +7034,26 @@ fi ], [AC_MSG_RESULT([no value specified])]) +# Check for --with-remote-debug +AC_MSG_CHECKING([for --with-remote-debug]) +AC_ARG_WITH( + [remote-debug], + [AS_HELP_STRING( + [--with-remote-debug], + [enable remote debugging support (default is yes)])], + [], + [with_remote_debug=yes]) + +if test "$with_remote_debug" = yes; then + AC_DEFINE([Py_REMOTE_DEBUG], [1], + [Define if you want to enable remote debugging support.]) + AC_MSG_RESULT([yes]) +else + AC_DEFINE([Py_REMOTE_DEBUG], [0], + [Define if you want to enable remote debugging support.]) + AC_MSG_RESULT([no]) +fi + case $ac_sys_system in AIX*) diff --git a/pyconfig.h.in b/pyconfig.h.in index dbf7865447bc2e..aa086d49e90a5b 100644 --- a/pyconfig.h.in +++ b/pyconfig.h.in @@ -1718,6 +1718,9 @@ /* Define if year with century should be normalized for strftime. */ #undef Py_NORMALIZE_CENTURY +/* Define if you want to enable remote debugging support. */ +#undef Py_REMOTE_DEBUG + /* Define if rl_startup_hook takes arguments */ #undef Py_RL_STARTUP_HOOK_TAKES_ARGS From 9b860221c2dbdd779ada04e501bb874ae32ec065 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sun, 23 Mar 2025 00:39:03 +0000 Subject: [PATCH 02/36] Add tests and use io.open_code --- Lib/test/test_sys.py | 191 ++++++++++++++++++++++++++++++++++++++ Python/ceval_gil.c | 27 +++++- Python/remote_debugging.c | 46 +++++---- Python/sysmodule.c | 2 +- 4 files changed, 241 insertions(+), 25 deletions(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 87c0106ad30840..8d617af2e3745b 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -12,12 +12,15 @@ import sys import sysconfig import test.support +from io import StringIO +from unittest import mock from test import support from test.support import os_helper from test.support.script_helper import assert_python_ok, assert_python_failure from test.support import threading_helper from test.support import import_helper from test.support import force_not_colorized +from test.support import SHORT_TIMEOUT try: from test.support import interpreters except ImportError: @@ -1923,5 +1926,193 @@ def write(self, s): self.assertEqual(out, b"") self.assertEqual(err, b"") + +def _supports_remote_attaching(): + PROCESS_VM_READV_SUPPORTED = False + + try: + from _testexternalinspection import PROCESS_VM_READV_SUPPORTED + except ImportError: + pass + + return PROCESS_VM_READV_SUPPORTED + +@unittest.skipIf(not sys.is_remote_debug_enabled(), "Remote debugging is not enabled") +@unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", + "Test only runs on Linux and MacOS") +@unittest.skipIf(sys.platform == "linux" and not _supports_remote_attaching(), + "Test only runs on Linux with process_vm_readv support") +class TestRemoteExec(unittest.TestCase): + def tearDown(self): + test.support.reap_children() + + def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologue=''): + # Create the script that will be remotely executed + script = os_helper.TESTFN + '_remote.py' + self.addCleanup(os_helper.unlink, script) + + with open(script, 'w') as f: + f.write(script_code) + + # Create and run the target process + target = os_helper.TESTFN + '_target.py' + self.addCleanup(os_helper.unlink, target) + + with os_helper.temp_dir() as work_dir: + fifo = f"{work_dir}/the_fifo" + os.mkfifo(fifo) + self.addCleanup(os_helper.unlink, fifo) + + with open(target, 'w') as f: + f.write(f''' +import sys +import time + +with open("{fifo}", "w") as fifo: + fifo.write("ready") + +{prologue} + +print("Target process running...") + +# Wait for remote script to be executed +# (the execution will happen as the following +# code is processed as soon as the read() call +# unblocks) +with open("{fifo}", "r") as fifo: + fifo.read() + +# Write confirmation back +with open("{fifo}", "w") as fifo: + fifo.write("executed") +''') + + # Start the target process and capture its output + cmd = [sys.executable] + if python_args: + cmd.extend(python_args) + cmd.append(target) + + with subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env) as proc: + try: + # Wait for process to be ready + with open(fifo, "r") as fifo_file: + response = fifo_file.read() + self.assertEqual(response, "ready") + + # Try remote exec on the target process + sys.remote_exec(proc.pid, script) + + # Signal script to continue + with open(fifo, "w") as fifo_file: + fifo_file.write("continue") + + # Wait for execution confirmation + with open(fifo, "r") as fifo_file: + response = fifo_file.read() + self.assertEqual(response, "executed") + + # Return output for test verification + stdout, stderr = proc.communicate(timeout=1.0) + return proc.returncode, stdout, stderr + except PermissionError: + self.skipTest("Insufficient permissions to execute code in remote process") + finally: + proc.kill() + proc.terminate() + proc.wait(timeout=SHORT_TIMEOUT) + + def test_remote_exec(self): + """Test basic remote exec functionality""" + script = ''' +print("Remote script executed successfully!") +''' + returncode, stdout, stderr = self._run_remote_exec_test(script) + self.assertEqual(returncode, 0) + self.assertIn(b"Remote script executed successfully!", stdout) + self.assertEqual(stderr, b"") + + def test_remote_exec_with_self_process(self): + """Test remote exec with the target process being the same as the test process""" + + code = 'import sys;print("Remote script executed successfully!", file=sys.stderr)' + file = os_helper.TESTFN + '_remote.py' + with open(file, 'w') as f: + f.write(code) + self.addCleanup(os_helper.unlink, file) + with mock.patch('sys.stderr', new_callable=StringIO) as mock_stderr: + with mock.patch('sys.stdout', new_callable=StringIO) as mock_stdout: + sys.remote_exec(os.getpid(), file) + print("Done") + self.assertEqual(mock_stderr.getvalue(), "Remote script executed successfully!\n") + self.assertEqual(mock_stdout.getvalue(), "Done\n") + + def test_remote_exec_raises_audit_event(self): + """Test remote exec raises an audit event""" + prologue = '''\ +import sys +def audit_hook(event, arg): + print(f"Audit event: {event}, arg: {arg}") +sys.addaudithook(audit_hook) +''' + script = ''' +print("Remote script executed successfully!") +''' + returncode, stdout, stderr = self._run_remote_exec_test(script, prologue=prologue) + self.assertEqual(returncode, 0) + self.assertIn(b"Remote script executed successfully!", stdout) + self.assertIn(b"Audit event: remote_debugger_script, arg: ", stdout) + self.assertEqual(stderr, b"") + + def test_remote_exec_with_exception(self): + """Test remote exec with an exception raised in the target process + + The exception should be raised in the main thread of the target process + but not crash the target process. + """ + script = ''' +raise Exception("Remote script exception") +''' + returncode, stdout, stderr = self._run_remote_exec_test(script) + self.assertEqual(returncode, 0) + self.assertIn(b"Remote script exception", stderr) + self.assertEqual(stdout, b"Target process running...\n") + + def test_remote_exec_disabled_by_env(self): + """Test remote exec is disabled when PYTHON_DISABLE_REMOTE_DEBUG is set""" + env = os.environ.copy() + env['PYTHON_DISABLE_REMOTE_DEBUG'] = '1' + with self.assertRaisesRegex(RuntimeError, "Remote debugging is not enabled in the remote process"): + self._run_remote_exec_test("print('should not run')", env=env) + + def test_remote_exec_disabled_by_xoption(self): + """Test remote exec is disabled with -Xdisable-remote-debug""" + with self.assertRaisesRegex(RuntimeError, "Remote debugging is not enabled in the remote process"): + self._run_remote_exec_test("print('should not run')", python_args=['-Xdisable-remote-debug']) + + def test_remote_exec_invalid_pid(self): + """Test remote exec with invalid process ID""" + with self.assertRaises(OSError): + sys.remote_exec(999999, "print('should not run')") + + def test_remote_exec_syntax_error(self): + """Test remote exec with syntax error in script""" + script = ''' +this is invalid python code +''' + returncode, stdout, stderr = self._run_remote_exec_test(script) + self.assertEqual(returncode, 0) + self.assertIn(b"SyntaxError", stderr) + self.assertEqual(stdout, b"Target process running...\n") + + def test_remote_exec_invalid_script_path(self): + """Test remote exec with invalid script path""" + with self.assertRaises(OSError): + sys.remote_exec(os.getpid(), "invalid_script_path") + + if __name__ == "__main__": unittest.main() diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 265e6d4a0a626c..94f9ee8d21c9c0 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1327,19 +1327,38 @@ _Py_HandlePending(PyThreadState *tstate) tstate->remote_debugger_support.debugger_pending_call = 0; const char *path = tstate->remote_debugger_support.debugger_script_path; if (*path) { - if (0 != PySys_Audit("debugger_script", "%s", path)) { - PyErr_Clear(); + if (0 != PySys_Audit("remote_debugger_script", "s", path)) { + PyErr_FormatUnraisable("Error when auditing remote debugger script %s", path); } else { - FILE* f = fopen(path, "r"); + // Open the debugger script with the open code hook. Unfortunately this forces us to handle + // the resulting Python object, which is a file object and therefore we need to call + // Python methods on it instead of the simpler C equivalents. + PyObject* fileobj = PyFile_OpenCode(path); + if (!fileobj) { + PyErr_FormatUnraisable("Error when opening debugger script %s", path); + return 0; + } + int fd = PyObject_AsFileDescriptor(fileobj); + if (fd == -1) { + PyErr_FormatUnraisable("Error when getting file descriptor for debugger script %s", path); + return 0; + } + FILE* f = fdopen(fd, "r"); if (!f) { PyErr_SetFromErrno(PyExc_OSError); } else { PyRun_AnyFile(f, path); - fclose(f); } if (PyErr_Occurred()) { PyErr_FormatUnraisable("Error executing debugger script %s", path); } + PyObject* res = PyObject_CallMethod(fileobj, "close", ""); + if (!res) { + PyErr_FormatUnraisable("Error when closing debugger script %s", path); + } else { + Py_DECREF(res); + } + Py_DECREF(fileobj); } } } diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 9e8cc8505697a4..3d7a659ea5408f 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -595,16 +595,27 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) } eval_breaker |= _PY_EVAL_PLEASE_STOP_BIT; - bytes = write_memory( - pid, - address_of_thread + local_debug_offsets.debugger_support.eval_breaker, - sizeof(uintptr_t), - &eval_breaker); - - if (bytes == -1) { + // Ensure our path is not too long + if (local_debug_offsets.debugger_support.debugger_script_path_size <= strlen(debugger_script_path)) { + PyErr_SetString(PyExc_ValueError, "Debugger script path is too long"); return -1; } + if (debugger_script_path != NULL) { + uintptr_t debugger_script_path_addr = ( + address_of_thread + + local_debug_offsets.debugger_support.remote_debugger_support + + local_debug_offsets.debugger_support.debugger_script_path); + bytes = write_memory( + pid, + debugger_script_path_addr, + strlen(debugger_script_path) + 1, + debugger_script_path); + if (bytes == -1) { + return -1; + } + } + int pending_call = 1; uintptr_t debugger_pending_call_addr = ( address_of_thread + @@ -620,19 +631,14 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) return -1; } - if (debugger_script_path != NULL) { - uintptr_t debugger_script_path_addr = ( - address_of_thread + - local_debug_offsets.debugger_support.remote_debugger_support + - local_debug_offsets.debugger_support.debugger_script_path); - bytes = write_memory( - pid, - debugger_script_path_addr, - strlen(debugger_script_path) + 1, - debugger_script_path); - if (bytes == -1) { - return -1; - } + bytes = write_memory( + pid, + address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker); + + if (bytes == -1) { + return -1; } return 0; diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 50f82f7aba1452..662d410b68cfd1 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2459,7 +2459,7 @@ it hasn't been overwritten. Args: pid (int): The process ID of the target Python process. - script (str|bytes|PathLike): The path to a file containing + script (str|bytes): The path to a file containing the Python code to be executed. [clinic start generated code]*/ From af84100ced5a390a15e1e970e86bff55a5c4bddd Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sun, 23 Mar 2025 01:05:12 +0000 Subject: [PATCH 03/36] Some fixes --- Lib/test/test_sys.py | 2 +- Python/remote_debugging.c | 73 ++++++++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 8d617af2e3745b..a00ebd03e11fa3 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -2096,7 +2096,7 @@ def test_remote_exec_disabled_by_xoption(self): def test_remote_exec_invalid_pid(self): """Test remote exec with invalid process ID""" with self.assertRaises(OSError): - sys.remote_exec(999999, "print('should not run')") + sys.remote_exec(99999, "print('should not run')") def test_remote_exec_syntax_error(self): """Test remote exec with syntax error in script""" diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 3d7a659ea5408f..934354801c5a93 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -65,8 +65,12 @@ module _pdb #if defined(__APPLE__) && TARGET_OS_OSX static uintptr_t -return_section_address(const char* section, mach_port_t proc_ref, uintptr_t base, void* map) -{ +return_section_address( + const char* section, + mach_port_t proc_ref, + uintptr_t base, + void* map +) { struct mach_header_64* hdr = (struct mach_header_64*)map; int ncmds = hdr->ncmds; @@ -76,35 +80,37 @@ return_section_address(const char* section, mach_port_t proc_ref, uintptr_t base mach_vm_size_t size = 0; mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); mach_vm_address_t address = (mach_vm_address_t)base; - vm_region_basic_info_data_64_t region_info; + vm_region_basic_info_data_64_t r_info; mach_port_t object_name; uintptr_t vmaddr = 0; for (int i = 0; cmd_cnt < 2 && i < ncmds; i++) { if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__TEXT") == 0) { - vmaddr = cmd->vmaddr; + vmaddr = cmd->vmaddr; } if (cmd->cmd == LC_SEGMENT_64 && strcmp(cmd->segname, "__DATA") == 0) { while (cmd->filesize != size) { address += size; - if (mach_vm_region( - proc_ref, - &address, - &size, - VM_REGION_BASIC_INFO_64, - (vm_region_info_t)®ion_info, // cppcheck-suppress [uninitvar] - &count, - &object_name) - != KERN_SUCCESS) - { - PyErr_SetString(PyExc_RuntimeError, "Cannot get any more VM maps.\n"); + kern_return_t ret = mach_vm_region( + proc_ref, + &address, + &size, + VM_REGION_BASIC_INFO_64, + (vm_region_info_t)&r_info, // cppcheck-suppress [uninitvar] + &count, + &object_name + ); + if (ret != KERN_SUCCESS) { + PyErr_SetString( + PyExc_RuntimeError, "Cannot get any more VM maps.\n"); return 0; } } int nsects = cmd->nsects; - struct section_64* sec = - (struct section_64*)((void*)cmd + sizeof(struct segment_command_64)); + struct section_64* sec = (struct section_64*)( + (void*)cmd + sizeof(struct segment_command_64) + ); for (int j = 0; j < nsects; j++) { if (strcmp(sec[j].sectname, section) == 0) { return base + sec[j].addr - vmaddr; @@ -115,6 +121,10 @@ return_section_address(const char* section, mach_port_t proc_ref, uintptr_t base cmd = (struct segment_command_64*)((void*)cmd + cmd->cmdsize); } + + // We should not be here, but if we are there, we should say about this + PyErr_SetString( + PyExc_RuntimeError, "Cannot find section address.\n"); return 0; } @@ -204,10 +214,16 @@ search_map_for_section(pid_t pid, const char* secname, const char* substr) { VM_REGION_BASIC_INFO_64, (vm_region_info_t)®ion_info, &count, - &object_name) - == KERN_SUCCESS) + &object_name) == KERN_SUCCESS) { - int path_len = proc_regionfilename(pid, address, map_filename, MAXPATHLEN); + if ((region_info.protection & VM_PROT_READ) == 0 + || (region_info.protection & VM_PROT_EXECUTE) == 0) { + address += size; + continue; + } + + int path_len = proc_regionfilename( + pid, address, map_filename, MAXPATHLEN); if (path_len == 0) { address += size; continue; @@ -222,11 +238,15 @@ search_map_for_section(pid_t pid, const char* secname, const char* substr) { if (!match_found && strncmp(filename, substr, strlen(substr)) == 0) { match_found = 1; - return search_section_in_file(secname, map_filename, address, size, proc_ref); + return search_section_in_file( + secname, map_filename, address, size, proc_ref); } address += size; } + + PyErr_SetString(PyExc_RuntimeError, + "mach_vm_region failed to find the section"); return 0; } @@ -359,6 +379,8 @@ get_py_runtime(pid_t pid) { uintptr_t address = search_map_for_section(pid, "PyRuntime", "libpython"); if (address == 0) { + // TODO: Differentiate between not found and error + PyErr_Clear(); address = search_map_for_section(pid, "PyRuntime", "python"); } return address; @@ -413,6 +435,9 @@ read_memory(pid_t pid, uintptr_t remote_address, size_t len, void* dst) } total_bytes = len; #else + PyErr_SetString( + PyExc_RuntimeError, + "Memory reading is not supported on this platform"); return -1; #endif return total_bytes; @@ -507,6 +532,12 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) } uintptr_t runtime_start_address = get_py_runtime(pid); + if (runtime_start_address == 0) { + if (!PyErr_Occurred()) { + PyErr_SetString(PyExc_RuntimeError, "Failed to get .PyRuntime address"); + } + return -1; + } struct _Py_DebugOffsets local_debug_offsets; if (read_offsets(pid, &runtime_start_address, &local_debug_offsets)) { From 19ef7ae234f35e7099c024c5dbcb31e01c10c011 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 24 Mar 2025 19:52:04 +0000 Subject: [PATCH 04/36] Prepare for windows support --- Include/internal/pycore_debug_offsets.h | 2 + Lib/test/test_sys.py | 125 ++++++----- Python/ceval_gil.c | 19 ++ Python/remote_debugging.c | 266 +++++++++++++++++------- Python/sysmodule.c | 27 ++- 5 files changed, 299 insertions(+), 140 deletions(-) diff --git a/Include/internal/pycore_debug_offsets.h b/Include/internal/pycore_debug_offsets.h index 47c66c70ae455f..124b104e4ba8ae 100644 --- a/Include/internal/pycore_debug_offsets.h +++ b/Include/internal/pycore_debug_offsets.h @@ -73,6 +73,7 @@ typedef struct _Py_DebugOffsets { uint64_t id; uint64_t next; uint64_t threads_head; + uint64_t threads_main; uint64_t gc; uint64_t imports_modules; uint64_t sysdict; @@ -232,6 +233,7 @@ typedef struct _Py_DebugOffsets { .id = offsetof(PyInterpreterState, id), \ .next = offsetof(PyInterpreterState, next), \ .threads_head = offsetof(PyInterpreterState, threads.head), \ + .threads_main = offsetof(PyInterpreterState, threads.main), \ .gc = offsetof(PyInterpreterState, gc), \ .imports_modules = offsetof(PyInterpreterState, imports.modules), \ .sysdict = offsetof(PyInterpreterState, sysdict), \ diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index a00ebd03e11fa3..8e0d62d6ffe0d5 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -11,6 +11,7 @@ import subprocess import sys import sysconfig +import socket import test.support from io import StringIO from unittest import mock @@ -1938,7 +1939,7 @@ def _supports_remote_attaching(): return PROCESS_VM_READV_SUPPORTED @unittest.skipIf(not sys.is_remote_debug_enabled(), "Remote debugging is not enabled") -@unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux", +@unittest.skipIf(sys.platform != "darwin" and sys.platform != "linux" and sys.platform != "win32", "Test only runs on Linux and MacOS") @unittest.skipIf(sys.platform == "linux" and not _supports_remote_attaching(), "Test only runs on Linux with process_vm_readv support") @@ -1958,18 +1959,23 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu target = os_helper.TESTFN + '_target.py' self.addCleanup(os_helper.unlink, target) - with os_helper.temp_dir() as work_dir: - fifo = f"{work_dir}/the_fifo" - os.mkfifo(fifo) - self.addCleanup(os_helper.unlink, fifo) + # Find an available port for the socket + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('localhost', 0)) + port = s.getsockname()[1] - with open(target, 'w') as f: - f.write(f''' + with open(target, 'w') as f: + f.write(f''' import sys import time +import socket -with open("{fifo}", "w") as fifo: - fifo.write("ready") +# Connect to the test process +sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) +sock.connect(('localhost', {port})) + +# Signal that the process is ready +sock.sendall(b"ready") {prologue} @@ -1977,53 +1983,61 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu # Wait for remote script to be executed # (the execution will happen as the following -# code is processed as soon as the read() call +# code is processed as soon as the recv call # unblocks) -with open("{fifo}", "r") as fifo: - fifo.read() +sock.recv(1024) # Write confirmation back -with open("{fifo}", "w") as fifo: - fifo.write("executed") +sock.sendall(b"executed") +sock.close() ''') - # Start the target process and capture its output - cmd = [sys.executable] - if python_args: - cmd.extend(python_args) - cmd.append(target) + # Start the target process and capture its output + cmd = [sys.executable] + if python_args: + cmd.extend(python_args) + cmd.append(target) - with subprocess.Popen(cmd, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=env) as proc: - try: - # Wait for process to be ready - with open(fifo, "r") as fifo_file: - response = fifo_file.read() - self.assertEqual(response, "ready") - - # Try remote exec on the target process - sys.remote_exec(proc.pid, script) - - # Signal script to continue - with open(fifo, "w") as fifo_file: - fifo_file.write("continue") - - # Wait for execution confirmation - with open(fifo, "r") as fifo_file: - response = fifo_file.read() - self.assertEqual(response, "executed") - - # Return output for test verification - stdout, stderr = proc.communicate(timeout=1.0) - return proc.returncode, stdout, stderr - except PermissionError: - self.skipTest("Insufficient permissions to execute code in remote process") - finally: - proc.kill() - proc.terminate() - proc.wait(timeout=SHORT_TIMEOUT) + # Create a socket server to communicate with the target process + server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + server_socket.bind(('localhost', port)) + server_socket.settimeout(10.0) # Set a timeout to prevent hanging + server_socket.listen(1) + + with subprocess.Popen(cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=env) as proc: + try: + # Accept connection from target process + client_socket, _ = server_socket.accept() + + # Wait for process to be ready + response = client_socket.recv(1024) + self.assertEqual(response, b"ready") + + # Try remote exec on the target process + sys.remote_exec(proc.pid, script) + + # Signal script to continue + client_socket.sendall(b"continue") + + # Wait for execution confirmation + response = client_socket.recv(1024) + self.assertEqual(response, b"executed") + + # Return output for test verification + stdout, stderr = proc.communicate(timeout=10.0) + return proc.returncode, stdout, stderr + except PermissionError: + self.skipTest("Insufficient permissions to execute code in remote process") + finally: + if 'client_socket' in locals(): + client_socket.close() + server_socket.close() + proc.kill() + proc.terminate() + proc.wait(timeout=SHORT_TIMEOUT) def test_remote_exec(self): """Test basic remote exec functionality""" @@ -2031,7 +2045,7 @@ def test_remote_exec(self): print("Remote script executed successfully!") ''' returncode, stdout, stderr = self._run_remote_exec_test(script) - self.assertEqual(returncode, 0) + # self.assertEqual(returncode, 0) self.assertIn(b"Remote script executed successfully!", stdout) self.assertEqual(stderr, b"") @@ -2039,13 +2053,13 @@ def test_remote_exec_with_self_process(self): """Test remote exec with the target process being the same as the test process""" code = 'import sys;print("Remote script executed successfully!", file=sys.stderr)' - file = os_helper.TESTFN + '_remote.py' + file = os_helper.TESTFN + '_remote_self.py' with open(file, 'w') as f: f.write(code) self.addCleanup(os_helper.unlink, file) with mock.patch('sys.stderr', new_callable=StringIO) as mock_stderr: with mock.patch('sys.stdout', new_callable=StringIO) as mock_stdout: - sys.remote_exec(os.getpid(), file) + sys.remote_exec(os.getpid(), os.path.abspath(file)) print("Done") self.assertEqual(mock_stderr.getvalue(), "Remote script executed successfully!\n") self.assertEqual(mock_stdout.getvalue(), "Done\n") @@ -2079,7 +2093,7 @@ def test_remote_exec_with_exception(self): returncode, stdout, stderr = self._run_remote_exec_test(script) self.assertEqual(returncode, 0) self.assertIn(b"Remote script exception", stderr) - self.assertEqual(stdout, b"Target process running...\n") + self.assertEqual(stdout.strip(), b"Target process running...") def test_remote_exec_disabled_by_env(self): """Test remote exec is disabled when PYTHON_DISABLE_REMOTE_DEBUG is set""" @@ -2106,13 +2120,12 @@ def test_remote_exec_syntax_error(self): returncode, stdout, stderr = self._run_remote_exec_test(script) self.assertEqual(returncode, 0) self.assertIn(b"SyntaxError", stderr) - self.assertEqual(stdout, b"Target process running...\n") + self.assertEqual(stdout.strip(), b"Target process running...") def test_remote_exec_invalid_script_path(self): """Test remote exec with invalid script path""" with self.assertRaises(OSError): sys.remote_exec(os.getpid(), "invalid_script_path") - if __name__ == "__main__": unittest.main() diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 94f9ee8d21c9c0..361a01d2c5c181 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1338,17 +1338,36 @@ _Py_HandlePending(PyThreadState *tstate) PyErr_FormatUnraisable("Error when opening debugger script %s", path); return 0; } +#ifdef MS_WINDOWS + PyObject* path_obj = PyUnicode_FromString(path); + if (!path_obj) { + PyErr_FormatUnraisable("Error when converting remote debugger script path %s to Unicode", path); + return 0; + } + wchar_t* wpath = PyUnicode_AsWideCharString(path_obj, NULL); + Py_DECREF(path_obj); + if (!wpath) { + PyErr_FormatUnraisable("Error when converting remote debugger script path %s to wide char", path); + return 0; + } + FILE* f = _wfopen(wpath, L"r"); +#else int fd = PyObject_AsFileDescriptor(fileobj); if (fd == -1) { PyErr_FormatUnraisable("Error when getting file descriptor for debugger script %s", path); return 0; } FILE* f = fdopen(fd, "r"); +#endif if (!f) { PyErr_SetFromErrno(PyExc_OSError); } else { PyRun_AnyFile(f, path); } +#ifdef MS_WINDOWS + PyMem_Free(wpath); + fclose(f); +#endif if (PyErr_Occurred()) { PyErr_FormatUnraisable("Error executing debugger script %s", path); } diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 934354801c5a93..d158517f646ddc 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -1,4 +1,5 @@ #define _GNU_SOURCE +#include "pyconfig.h" #ifdef __linux__ # include @@ -35,6 +36,13 @@ # endif #endif +#ifdef MS_WINDOWS + // Windows includes and definitions + #include + #include + #include +#endif + #include #include #include @@ -42,10 +50,12 @@ #include #include #include -#include -#include -#include -#include +#ifndef MS_WINDOWS + #include + #include + #include + #include +#endif #ifndef Py_BUILD_CORE_BUILTIN # define Py_BUILD_CORE_MODULE 1 @@ -58,6 +68,42 @@ # define HAVE_PROCESS_VM_READV 0 #endif +// Define a platform-independent process handle structure +typedef struct { + pid_t pid; +#ifdef MS_WINDOWS + HANDLE hProcess; +#endif +} proc_handle_t; + +// Initialize the process handle +static int +init_proc_handle(proc_handle_t *handle, pid_t pid) { + handle->pid = pid; +#ifdef MS_WINDOWS + handle->hProcess = OpenProcess( + PROCESS_VM_READ | PROCESS_VM_WRITE | PROCESS_VM_OPERATION | PROCESS_QUERY_INFORMATION, + FALSE, pid); + if (handle->hProcess == NULL) { + PyErr_SetFromWindowsErr(0); + return -1; + } +#endif + return 0; +} + +// Clean up the process handle +static void +cleanup_proc_handle(proc_handle_t *handle) { +#ifdef MS_WINDOWS + if (handle->hProcess != NULL) { + CloseHandle(handle->hProcess); + handle->hProcess = NULL; + } +#endif + handle->pid = 0; +} + /*[clinic input] module _pdb [clinic start generated code]*/ @@ -192,14 +238,14 @@ pid_to_task(pid_t pid) } static uintptr_t -search_map_for_section(pid_t pid, const char* secname, const char* substr) { +search_map_for_section(proc_handle_t *handle, const char* secname, const char* substr) { mach_vm_address_t address = 0; mach_vm_size_t size = 0; mach_msg_type_number_t count = sizeof(vm_region_basic_info_data_64_t); vm_region_basic_info_data_64_t region_info; mach_port_t object_name; - mach_port_t proc_ref = pid_to_task(pid); + mach_port_t proc_ref = pid_to_task(handle->pid); if (proc_ref == 0) { PyErr_SetString(PyExc_PermissionError, "Cannot get task for PID"); return 0; @@ -223,7 +269,7 @@ search_map_for_section(pid_t pid, const char* secname, const char* substr) { } int path_len = proc_regionfilename( - pid, address, map_filename, MAXPATHLEN); + handle->pid, address, map_filename, MAXPATHLEN); if (path_len == 0) { address += size; continue; @@ -250,14 +296,14 @@ search_map_for_section(pid_t pid, const char* secname, const char* substr) { return 0; } -#endif +#endif // (__APPLE__ && TARGET_OS_OSX) #ifdef __linux__ static uintptr_t -find_map_start_address(pid_t pid, char* result_filename, const char* map) +find_map_start_address(proc_handle_t *handle, char* result_filename, const char* map) { char maps_file_path[64]; - sprintf(maps_file_path, "/proc/%d/maps", pid); + sprintf(maps_file_path, "/proc/%d/maps", handle->pid); FILE* maps_file = fopen(maps_file_path, "r"); if (maps_file == NULL) { @@ -298,10 +344,10 @@ find_map_start_address(pid_t pid, char* result_filename, const char* map) } static uintptr_t -search_map_for_section(pid_t pid, const char* secname, const char* map) +search_map_for_section(proc_handle_t *handle, const char* secname, const char* map) { char elf_file[256]; - uintptr_t start_address = find_map_start_address(pid, elf_file, map); + uintptr_t start_address = find_map_start_address(handle, elf_file, map); if (start_address == 0) { return 0; @@ -363,7 +409,7 @@ search_map_for_section(pid_t pid, const char* secname, const char* map) } exit: - if (close(fd) != 0) { + if (fd >= 0 && close(fd) != 0) { PyErr_SetFromErrno(PyExc_OSError); } if (file_memory != NULL) { @@ -372,49 +418,67 @@ search_map_for_section(pid_t pid, const char* secname, const char* map) return result; } -#endif +#endif // __linux__ +#ifdef MS_WINDOWS + +static uintptr_t static uintptr_t -get_py_runtime(pid_t pid) +search_map_for_section(proc_handle_t *handle, const char* secname, const char* substr) { + //TODO: Implement this function + PyErr_SetString(PyExc_RuntimeError, "search_map_for_section not implemented on Windows"); + return 0; +} + +#endif // MS_WINDOWS + +// Get the PyRuntime section address for any platform +static uintptr_t +get_py_runtime(proc_handle_t *handle) { - uintptr_t address = search_map_for_section(pid, "PyRuntime", "libpython"); + // Try libpython first, then fall back to python + uintptr_t address = search_map_for_section(handle, "PyRuntime", "libpython"); if (address == 0) { // TODO: Differentiate between not found and error PyErr_Clear(); - address = search_map_for_section(pid, "PyRuntime", "python"); + address = search_map_for_section(handle, "PyRuntime", "python"); } return address; } -static ssize_t -read_memory(pid_t pid, uintptr_t remote_address, size_t len, void* dst) +// Platform-independent memory read function +static Py_ssize_t +read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* dst) { - ssize_t total_bytes = 0; -#if defined(__linux__) && HAVE_PROCESS_VM_READV +#ifdef MS_WINDOWS + // TODO: Implement this function + PyErr_SetString(PyExc_RuntimeError, "Memory reading is not supported on Windows"); + return -1; +#elif defined(__linux__) && HAVE_PROCESS_VM_READV struct iovec local[1]; struct iovec remote[1]; - ssize_t result = 0; - ssize_t read = 0; + Py_ssize_t result = 0; + Py_ssize_t read_bytes = 0; do { - local[0].iov_base = dst + result; + local[0].iov_base = (char*)dst + result; local[0].iov_len = len - result; remote[0].iov_base = (void*)(remote_address + result); remote[0].iov_len = len - result; - read = process_vm_readv(pid, local, 1, remote, 1, 0); - if (read < 0) { + read_bytes = process_vm_readv(handle->pid, local, 1, remote, 1, 0); + if (read_bytes < 0) { PyErr_SetFromErrno(PyExc_OSError); return -1; } - result += read; - } while ((size_t)read != local[0].iov_len); - total_bytes = result; + result += read_bytes; + } while ((size_t)read_bytes != local[0].iov_len); + return result; #elif defined(__APPLE__) && TARGET_OS_OSX - ssize_t result = -1; + Py_ssize_t result = -1; kern_return_t kr = mach_vm_read_overwrite( - pid_to_task(pid), + pid_to_task(handle->pid), (mach_vm_address_t)remote_address, len, (mach_vm_address_t)dst, @@ -433,25 +497,28 @@ read_memory(pid_t pid, uintptr_t remote_address, size_t len, void* dst) } return -1; } - total_bytes = len; + return len; #else PyErr_SetString( PyExc_RuntimeError, "Memory reading is not supported on this platform"); return -1; #endif - return total_bytes; } -ssize_t -write_memory(pid_t pid, uintptr_t remote_address, size_t len, const void* src) +// Platform-independent memory write function +static Py_ssize_t +write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) { - ssize_t total_bytes_written = 0; -#if defined(__linux__) && HAVE_PROCESS_VM_READV +#ifdef MS_WINDOWS + // TODO: Implement this function + PyErr_SetString(PyExc_RuntimeError, "Memory writing is not supported on Windows"); + return -1; +#elif defined(__linux__) && HAVE_PROCESS_VM_READV struct iovec local[1]; struct iovec remote[1]; - ssize_t result = 0; - ssize_t written = 0; + Py_ssize_t result = 0; + Py_ssize_t written = 0; do { local[0].iov_base = (void*)((char*)src + result); @@ -459,7 +526,7 @@ write_memory(pid_t pid, uintptr_t remote_address, size_t len, const void* src) remote[0].iov_base = (void*)((char*)remote_address + result); remote[0].iov_len = len - result; - written = process_vm_writev(pid, local, 1, remote, 1, 0); + written = process_vm_writev(handle->pid, local, 1, remote, 1, 0); if (written < 0) { PyErr_SetFromErrno(PyExc_OSError); return -1; @@ -467,10 +534,10 @@ write_memory(pid_t pid, uintptr_t remote_address, size_t len, const void* src) result += written; } while ((size_t)written != local[0].iov_len); - total_bytes_written = result; + return result; #elif defined(__APPLE__) && TARGET_OS_OSX kern_return_t kr = mach_vm_write( - pid_to_task(pid), + pid_to_task(handle->pid), (mach_vm_address_t)remote_address, (vm_offset_t)src, (mach_msg_type_number_t)len); @@ -488,31 +555,30 @@ write_memory(pid_t pid, uintptr_t remote_address, size_t len, const void* src) } return -1; } - total_bytes_written = len; + return len; #else PyErr_Format(PyExc_RuntimeError, "Writing memory is not supported on this platform"); return -1; #endif - return total_bytes_written; } static int read_offsets( - int pid, + proc_handle_t *handle, uintptr_t *runtime_start_address, _Py_DebugOffsets* debug_offsets ) { - *runtime_start_address = get_py_runtime(pid); + *runtime_start_address = get_py_runtime(handle); if (!*runtime_start_address) { if (!PyErr_Occurred()) { PyErr_SetString( - PyExc_RuntimeError, "Failed to get .PyRuntime address"); + PyExc_RuntimeError, "Failed to get PyRuntime address"); } return -1; } size_t size = sizeof(struct _Py_DebugOffsets); - ssize_t bytes = read_memory( - pid, *runtime_start_address, size, debug_offsets); + Py_ssize_t bytes = read_memory( + handle, *runtime_start_address, size, debug_offsets); if (bytes == -1) { return -1; } @@ -522,113 +588,143 @@ read_offsets( int _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) { -#if (!defined(__linux__) && !defined(__APPLE__)) || (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString(PyExc_RuntimeError, "get_stack_trace is not supported on this platform"); +#if (!defined(__linux__) && !defined(__APPLE__) && !defined(MS_WINDOWS)) || (defined(__linux__) && !HAVE_PROCESS_VM_READV) + PyErr_SetString(PyExc_RuntimeError, "Remote debugging is not supported on this platform"); return -1; #endif - if (debugger_script_path != NULL && strlen(debugger_script_path) > PATH_MAX) { - PyErr_SetString(PyExc_ValueError, "Debugger script path is too long"); + + proc_handle_t handle; + if (init_proc_handle(&handle, pid) < 0) { return -1; } - uintptr_t runtime_start_address = get_py_runtime(pid); - if (runtime_start_address == 0) { - if (!PyErr_Occurred()) { - PyErr_SetString(PyExc_RuntimeError, "Failed to get .PyRuntime address"); - } +#ifdef MS_WINDOWS + if (debugger_script_path != NULL && strlen(debugger_script_path) > MAX_PATH) { +#else + if (debugger_script_path != NULL && strlen(debugger_script_path) > PATH_MAX) { +#endif + PyErr_SetString(PyExc_ValueError, "Debugger script path is too long"); + cleanup_proc_handle(&handle); return -1; } - struct _Py_DebugOffsets local_debug_offsets; - if (read_offsets(pid, &runtime_start_address, &local_debug_offsets)) { + uintptr_t runtime_start_address; + struct _Py_DebugOffsets local_debug_offsets; + + if (read_offsets(&handle, &runtime_start_address, &local_debug_offsets)) { + cleanup_proc_handle(&handle); return -1; } - off_t interpreter_state_list_head = local_debug_offsets.runtime_state.interpreters_head; + uintptr_t interpreter_state_list_head = local_debug_offsets.runtime_state.interpreters_head; uintptr_t address_of_interpreter_state; - int bytes = read_memory( - pid, + Py_ssize_t bytes = read_memory( + &handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); if (bytes == -1) { + cleanup_proc_handle(&handle); return -1; } if (address_of_interpreter_state == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + cleanup_proc_handle(&handle); return -1; } int is_remote_debugging_enabled = 0; bytes = read_memory( - pid, + &handle, address_of_interpreter_state + local_debug_offsets.debugger_support.remote_debugging_enabled, sizeof(int), &is_remote_debugging_enabled); if (bytes == -1) { + cleanup_proc_handle(&handle); return -1; } + if (is_remote_debugging_enabled == 0) { PyErr_SetString(PyExc_RuntimeError, "Remote debugging is not enabled in the remote process"); + cleanup_proc_handle(&handle); return -1; } uintptr_t address_of_thread; - bytes = read_memory( - pid, - address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_head, - sizeof(void*), - &address_of_thread); - if (bytes == -1) { - return -1; - } - pid_t this_tid = 0; + if (tid != 0) { + bytes = read_memory( + &handle, + address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_head, + sizeof(void*), + &address_of_thread); + if (bytes == -1) { + cleanup_proc_handle(&handle); + return -1; + } while (address_of_thread != 0) { bytes = read_memory( - pid, + &handle, address_of_thread + local_debug_offsets.thread_state.native_thread_id, sizeof(pid_t), &this_tid); if (bytes == -1) { + cleanup_proc_handle(&handle); return -1; } + if (this_tid == tid) { break; } + bytes = read_memory( - pid, + &handle, address_of_thread + local_debug_offsets.thread_state.next, sizeof(void*), &address_of_thread); if (bytes == -1) { + cleanup_proc_handle(&handle); return -1; } } + } else { + bytes = read_memory( + &handle, + address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_main, + sizeof(void*), + &address_of_thread); + if (bytes == -1) { + cleanup_proc_handle(&handle); + return -1; + } } if (address_of_thread == 0) { PyErr_SetString(PyExc_RuntimeError, "No thread state found"); + cleanup_proc_handle(&handle); return -1; } uintptr_t eval_breaker; bytes = read_memory( - pid, + &handle, address_of_thread + local_debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker); if (bytes == -1) { + cleanup_proc_handle(&handle); return -1; } + eval_breaker |= _PY_EVAL_PLEASE_STOP_BIT; // Ensure our path is not too long if (local_debug_offsets.debugger_support.debugger_script_path_size <= strlen(debugger_script_path)) { PyErr_SetString(PyExc_ValueError, "Debugger script path is too long"); + cleanup_proc_handle(&handle); return -1; } @@ -638,11 +734,12 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) local_debug_offsets.debugger_support.remote_debugger_support + local_debug_offsets.debugger_support.debugger_script_path); bytes = write_memory( - pid, + &handle, debugger_script_path_addr, strlen(debugger_script_path) + 1, debugger_script_path); if (bytes == -1) { + cleanup_proc_handle(&handle); return -1; } } @@ -653,24 +750,35 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) local_debug_offsets.debugger_support.remote_debugger_support + local_debug_offsets.debugger_support.debugger_pending_call); bytes = write_memory( - pid, + &handle, debugger_pending_call_addr, sizeof(int), &pending_call); if (bytes == -1) { + cleanup_proc_handle(&handle); return -1; } bytes = write_memory( - pid, + &handle, address_of_thread + local_debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker); if (bytes == -1) { + cleanup_proc_handle(&handle); return -1; } + bytes = read_memory( + &handle, + address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker); + + printf("Eval breaker: %p\n", (void*)eval_breaker); + + cleanup_proc_handle(&handle); return 0; } \ No newline at end of file diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 662d410b68cfd1..e310b4fb7411d0 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2467,15 +2467,19 @@ static PyObject * sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) /*[clinic end generated code: output=7d94c56afe4a52c0 input=5749b0253d5b588c]*/ { - const char *debugger_script_path = PyUnicode_AsUTF8(script); - if (debugger_script_path == NULL) { +#ifdef MS_WINDOWS + // Get UTF-16 (wide char) version of the path for Windows + wchar_t *debugger_script_path_w = PyUnicode_AsWideCharString(script, NULL); + if (debugger_script_path_w == NULL) { return NULL; } -#ifdef MS_WINDOWS - DWORD attr = GetFileAttributesA(debugger_script_path); + // Check file attributes using wide character version (W) instead of ANSI (A) + DWORD attr = GetFileAttributesW(debugger_script_path_w); if (attr == INVALID_FILE_ATTRIBUTES) { DWORD err = GetLastError(); + PyMem_Free(debugger_script_path_w); + if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) { PyErr_SetString(PyExc_FileNotFoundError, "Script file does not exist"); } @@ -2487,7 +2491,21 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) } return NULL; } + + // Get UTF-8 version for the rest of the code + const char *debugger_script_path = PyUnicode_AsUTF8(script); + if (debugger_script_path == NULL) { + PyMem_Free(debugger_script_path_w); + return NULL; + } + + PyMem_Free(debugger_script_path_w); #else + const char *debugger_script_path = PyUnicode_AsUTF8(script); + if (debugger_script_path == NULL) { + return NULL; + } + if (access(debugger_script_path, F_OK | R_OK) != 0) { switch (errno) { case ENOENT: @@ -2502,7 +2520,6 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) return NULL; } #endif - if (_PySysRemoteDebug_SendExec(pid, 0, debugger_script_path) < 0) { return NULL; } From 444453c46fadf025f65f95c94083c0b74cab34e1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 24 Mar 2025 22:21:11 +0000 Subject: [PATCH 05/36] Update windows files --- PCbuild/_freeze_module.vcxproj | 1 + PCbuild/_freeze_module.vcxproj.filters | 3 +++ PCbuild/build.bat | 2 ++ PCbuild/pythoncore.vcxproj | 2 ++ PCbuild/pythoncore.vcxproj.filters | 3 +++ 5 files changed, 11 insertions(+) diff --git a/PCbuild/_freeze_module.vcxproj b/PCbuild/_freeze_module.vcxproj index 44292ee32b19fa..59d664dc1745c1 100644 --- a/PCbuild/_freeze_module.vcxproj +++ b/PCbuild/_freeze_module.vcxproj @@ -260,6 +260,7 @@ + diff --git a/PCbuild/_freeze_module.vcxproj.filters b/PCbuild/_freeze_module.vcxproj.filters index 3842f52e514bb4..0a64de1d4f0e88 100644 --- a/PCbuild/_freeze_module.vcxproj.filters +++ b/PCbuild/_freeze_module.vcxproj.filters @@ -406,6 +406,9 @@ Source Files + + Source Files + Source Files diff --git a/PCbuild/build.bat b/PCbuild/build.bat index 9fbc3e62ce75cc..db67ae72981345 100644 --- a/PCbuild/build.bat +++ b/PCbuild/build.bat @@ -95,6 +95,7 @@ if "%~1"=="--experimental-jit" (set UseJIT=true) & (set UseTIER2=1) & shift & go if "%~1"=="--experimental-jit-off" (set UseJIT=true) & (set UseTIER2=3) & shift & goto CheckOpts if "%~1"=="--experimental-jit-interpreter" (set UseTIER2=4) & shift & goto CheckOpts if "%~1"=="--experimental-jit-interpreter-off" (set UseTIER2=6) & shift & goto CheckOpts +if "%~1"=="--without-remote-debug" (set DisableRemoteDebug=true) & shift & goto CheckOpts if "%~1"=="--pystats" (set PyStats=1) & shift & goto CheckOpts if "%~1"=="--tail-call-interp" (set UseTailCallInterp=true) & shift & goto CheckOpts rem These use the actual property names used by MSBuild. We could just let @@ -192,6 +193,7 @@ echo on /p:UseTIER2=%UseTIER2%^ /p:PyStats=%PyStats%^ /p:UseTailCallInterp=%UseTailCallInterp%^ + /p:DisableRemoteDebug=%DisableRemoteDebug%^ %1 %2 %3 %4 %5 %6 %7 %8 %9 @echo off diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 2e639ddfc320f5..009e4f2302e311 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -108,6 +108,7 @@ _Py_TIER2=$(UseTIER2);%(PreprocessorDefinitions) Py_TAIL_CALL_INTERP=1;%(PreprocessorDefinitions) HAVE_COMPUTED_GOTOS;%(PreprocessorDefinitions) + Py_REMOTE_DEBUG;%(PreprocessorDefinitions) version.lib;ws2_32.lib;pathcch.lib;bcrypt.lib;%(AdditionalDependencies) @@ -640,6 +641,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 31064f50f5c8d7..134212662ab379 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -1490,6 +1490,9 @@ Python + + Python + Python From 1d3ad3c4b5650a12e9453258dfb7f43bd583ac2a Mon Sep 17 00:00:00 2001 From: Ivona99 Date: Tue, 25 Mar 2025 13:19:10 +0100 Subject: [PATCH 06/36] Fix syntax error --- Python/remote_debugging.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index d158517f646ddc..189bec25390640 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -423,8 +423,7 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* m #ifdef MS_WINDOWS static uintptr_t -static uintptr_t -search_map_for_section(proc_handle_t *handle, const char* secname, const char* substr) { +search_map_for_section(proc_handle_t* handle, const char* secname, const char* substr) { //TODO: Implement this function PyErr_SetString(PyExc_RuntimeError, "search_map_for_section not implemented on Windows"); return 0; @@ -781,4 +780,4 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) cleanup_proc_handle(&handle); return 0; -} \ No newline at end of file +} From fd993e3d59fab50e2f5e8e5febb84b30f166695c Mon Sep 17 00:00:00 2001 From: Ivona99 Date: Thu, 27 Mar 2025 11:03:10 +0100 Subject: [PATCH 07/36] Locate PyRuntime on Windows The PyRuntime section can be found in either the main executable (python.exe) or the Python DLL (pythonXY.dll, where X and Y represent the major and minor version numbers). Scan the modules of the process to locate the PyRuntime address in one of these modules. --- Python/remote_debugging.c | 111 ++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 9 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 189bec25390640..c67aa0c605a547 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -422,26 +422,119 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* m #ifdef MS_WINDOWS +static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* secname) { + HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (hFile == INVALID_HANDLE_VALUE) { + PyErr_SetFromWindowsErr(0); + return NULL; + } + HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, 0); + if (!hMap) { + PyErr_SetFromWindowsErr(0); + CloseHandle(hFile); + return NULL; + } + + BYTE* mapView = (BYTE*)MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0); + if (!mapView) { + PyErr_SetFromWindowsErr(0); + CloseHandle(hMap); + CloseHandle(hFile); + return NULL; + } + + IMAGE_DOS_HEADER* pDOSHeader = (IMAGE_DOS_HEADER*)mapView; + if (pDOSHeader->e_magic != IMAGE_DOS_SIGNATURE) { + PyErr_SetString(PyExc_RuntimeError, "Invalid DOS signature."); + UnmapViewOfFile(mapView); + CloseHandle(hMap); + CloseHandle(hFile); + return NULL; + } + + IMAGE_NT_HEADERS* pNTHeaders = (IMAGE_NT_HEADERS*)(mapView + pDOSHeader->e_lfanew); + if (pNTHeaders->Signature != IMAGE_NT_SIGNATURE) { + PyErr_SetString(PyExc_RuntimeError, "Invalid NT signature."); + UnmapViewOfFile(mapView); + CloseHandle(hMap); + CloseHandle(hFile); + return NULL; + } + + IMAGE_SECTION_HEADER* pSection_header = (IMAGE_SECTION_HEADER*)(mapView + pDOSHeader->e_lfanew + sizeof(IMAGE_NT_HEADERS)); + void* runtime_addr = NULL; + + for (int i = 0; i < pNTHeaders->FileHeader.NumberOfSections; i++) { + const char* name = (const char*)pSection_header[i].Name; + if (strncmp(name, secname, IMAGE_SIZEOF_SHORT_NAME) == 0) { + runtime_addr = remote_base + pSection_header[i].VirtualAddress; + break; + } + } + + UnmapViewOfFile(mapView); + CloseHandle(hMap); + CloseHandle(hFile); + + return runtime_addr; +} + + static uintptr_t -search_map_for_section(proc_handle_t* handle, const char* secname, const char* substr) { - //TODO: Implement this function - PyErr_SetString(PyExc_RuntimeError, "search_map_for_section not implemented on Windows"); - return 0; +search_windows_map_for_section(proc_handle_t* handle, const char* secname, const wchar_t* substr) { + HANDLE hProcSnap; + do { + hProcSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, handle->pid); + } while (hProcSnap == INVALID_HANDLE_VALUE && GetLastError() == ERROR_BAD_LENGTH); + + if (hProcSnap == INVALID_HANDLE_VALUE) { + PyErr_SetString(PyExc_PermissionError, "Unable to create module snapshot. Check permissions or PID."); + return 0; + } + + MODULEENTRY32W moduleEntry; + moduleEntry.dwSize = sizeof(moduleEntry); + void* runtime_addr = NULL; + + for (BOOL hasModule = Module32FirstW(hProcSnap, &moduleEntry); hasModule; hasModule = Module32NextW(hProcSnap, &moduleEntry)) { + // Look for either python executable or DLL + if (wcsstr(moduleEntry.szModule, substr)) { + runtime_addr = analyze_pe(moduleEntry.szExePath, moduleEntry.modBaseAddr, secname); + if (runtime_addr != NULL) { + break; + } + } + } + + CloseHandle(hProcSnap); + return (uintptr_t)runtime_addr; } #endif // MS_WINDOWS // Get the PyRuntime section address for any platform static uintptr_t -get_py_runtime(proc_handle_t *handle) +get_py_runtime(proc_handle_t* handle) { - // Try libpython first, then fall back to python - uintptr_t address = search_map_for_section(handle, "PyRuntime", "libpython"); + uintptr_t address = 0; + +#ifndef MS_WINDOWS + // On non-Windows platforms, try libpython first, then fall back to python + address = search_map_for_section(handle, "PyRuntime", "libpython"); if (address == 0) { // TODO: Differentiate between not found and error PyErr_Clear(); address = search_map_for_section(handle, "PyRuntime", "python"); } +#else + // On Windows, search for 'python' in executable or DLL + address = search_windows_map_for_section(handle, "PyRuntime", L"python"); + if (address == 0) { + // Error out: 'python' substring covers both executable and DLL + PyErr_SetString(PyExc_RuntimeError, "Failed to find the PyRuntime section in the process."); + } +#endif + return address; } @@ -452,7 +545,7 @@ read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* ds #ifdef MS_WINDOWS // TODO: Implement this function PyErr_SetString(PyExc_RuntimeError, "Memory reading is not supported on Windows"); - return -1; + return -1; #elif defined(__linux__) && HAVE_PROCESS_VM_READV struct iovec local[1]; struct iovec remote[1]; @@ -512,7 +605,7 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const #ifdef MS_WINDOWS // TODO: Implement this function PyErr_SetString(PyExc_RuntimeError, "Memory writing is not supported on Windows"); - return -1; + return -1; #elif defined(__linux__) && HAVE_PROCESS_VM_READV struct iovec local[1]; struct iovec remote[1]; From 96798c359d03183df8fafaabb9ae2863d1728813 Mon Sep 17 00:00:00 2001 From: Ivona99 Date: Thu, 27 Mar 2025 11:07:52 +0100 Subject: [PATCH 08/36] Read process memory on Windows --- Python/remote_debugging.c | 235 ++++++++++++++++++++------------------ 1 file changed, 121 insertions(+), 114 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index c67aa0c605a547..efb17f4c341f33 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -38,9 +38,9 @@ #ifdef MS_WINDOWS // Windows includes and definitions - #include - #include - #include +#include +#include +#include #endif #include @@ -51,10 +51,10 @@ #include #include #ifndef MS_WINDOWS - #include - #include - #include - #include +#include +#include +#include +#include #endif #ifndef Py_BUILD_CORE_BUILTIN @@ -156,7 +156,7 @@ return_section_address( int nsects = cmd->nsects; struct section_64* sec = (struct section_64*)( (void*)cmd + sizeof(struct segment_command_64) - ); + ); for (int j = 0; j < nsects; j++) { if (strcmp(sec[j].sectname, section) == 0) { return base + sec[j].addr - vmaddr; @@ -201,19 +201,19 @@ search_section_in_file(const char* secname, char* path, uintptr_t base, mach_vm_ struct mach_header_64* hdr = (struct mach_header_64*)map; switch (hdr->magic) { - case MH_MAGIC: - case MH_CIGAM: - case FAT_MAGIC: - case FAT_CIGAM: - PyErr_SetString(PyExc_RuntimeError, "32-bit Mach-O binaries are not supported"); - break; - case MH_MAGIC_64: - case MH_CIGAM_64: - result = return_section_address(secname, proc_ref, base, map); - break; - default: - PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); - break; + case MH_MAGIC: + case MH_CIGAM: + case FAT_MAGIC: + case FAT_CIGAM: + PyErr_SetString(PyExc_RuntimeError, "32-bit Mach-O binaries are not supported"); + break; + case MH_MAGIC_64: + case MH_CIGAM_64: + result = return_section_address(secname, proc_ref, base, map); + break; + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown Mach-O magic"); + break; } munmap(map, fs.st_size); @@ -254,13 +254,13 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* s int match_found = 0; char map_filename[MAXPATHLEN + 1]; while (mach_vm_region( - proc_ref, - &address, - &size, - VM_REGION_BASIC_INFO_64, - (vm_region_info_t)®ion_info, - &count, - &object_name) == KERN_SUCCESS) + proc_ref, + &address, + &size, + VM_REGION_BASIC_INFO_64, + (vm_region_info_t)®ion_info, + &count, + &object_name) == KERN_SUCCESS) { if ((region_info.protection & VM_PROT_READ) == 0 || (region_info.protection & VM_PROT_EXECUTE) == 0) { @@ -404,7 +404,7 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* m if (section != NULL && first_load_segment != NULL) { uintptr_t elf_load_addr = first_load_segment->p_vaddr - - (first_load_segment->p_vaddr % first_load_segment->p_align); + - (first_load_segment->p_vaddr % first_load_segment->p_align); result = start_address + (uintptr_t)section->sh_addr - elf_load_addr; } @@ -543,9 +543,16 @@ static Py_ssize_t read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* dst) { #ifdef MS_WINDOWS - // TODO: Implement this function - PyErr_SetString(PyExc_RuntimeError, "Memory reading is not supported on Windows"); - return -1; + SIZE_T read_bytes = 0; + SIZE_T result = 0; + do { + if (!ReadProcessMemory(handle->hProcess, (LPCVOID)(remote_address + result), (char*)dst + result, len - result, &read_bytes)) { + PyErr_SetFromWindowsErr(0); + return -1; + } + result += read_bytes; + } while (result < len); + return (Py_ssize_t)result; #elif defined(__linux__) && HAVE_PROCESS_VM_READV struct iovec local[1]; struct iovec remote[1]; @@ -570,22 +577,22 @@ read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* ds #elif defined(__APPLE__) && TARGET_OS_OSX Py_ssize_t result = -1; kern_return_t kr = mach_vm_read_overwrite( - pid_to_task(handle->pid), - (mach_vm_address_t)remote_address, - len, - (mach_vm_address_t)dst, - (mach_vm_size_t*)&result); + pid_to_task(handle->pid), + (mach_vm_address_t)remote_address, + len, + (mach_vm_address_t)dst, + (mach_vm_size_t*)&result); if (kr != KERN_SUCCESS) { switch (kr) { - case KERN_PROTECTION_FAILURE: - PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory"); - break; - case KERN_INVALID_ARGUMENT: - PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite"); - break; - default: - PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory"); + case KERN_PROTECTION_FAILURE: + PyErr_SetString(PyExc_PermissionError, "Not enough permissions to read memory"); + break; + case KERN_INVALID_ARGUMENT: + PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_read_overwrite"); + break; + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown error reading memory"); } return -1; } @@ -629,21 +636,21 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const return result; #elif defined(__APPLE__) && TARGET_OS_OSX kern_return_t kr = mach_vm_write( - pid_to_task(handle->pid), - (mach_vm_address_t)remote_address, - (vm_offset_t)src, - (mach_msg_type_number_t)len); + pid_to_task(handle->pid), + (mach_vm_address_t)remote_address, + (vm_offset_t)src, + (mach_msg_type_number_t)len); if (kr != KERN_SUCCESS) { switch (kr) { - case KERN_PROTECTION_FAILURE: - PyErr_SetString(PyExc_PermissionError, "Not enough permissions to write memory"); - break; - case KERN_INVALID_ARGUMENT: - PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_write"); - break; - default: - PyErr_SetString(PyExc_RuntimeError, "Unknown error writing memory"); + case KERN_PROTECTION_FAILURE: + PyErr_SetString(PyExc_PermissionError, "Not enough permissions to write memory"); + break; + case KERN_INVALID_ARGUMENT: + PyErr_SetString(PyExc_PermissionError, "Invalid argument to mach_vm_write"); + break; + default: + PyErr_SetString(PyExc_RuntimeError, "Unknown error writing memory"); } return -1; } @@ -702,7 +709,7 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) uintptr_t runtime_start_address; struct _Py_DebugOffsets local_debug_offsets; - + if (read_offsets(&handle, &runtime_start_address, &local_debug_offsets)) { cleanup_proc_handle(&handle); return -1; @@ -712,10 +719,10 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) uintptr_t address_of_interpreter_state; Py_ssize_t bytes = read_memory( - &handle, - runtime_start_address + interpreter_state_list_head, - sizeof(void*), - &address_of_interpreter_state); + &handle, + runtime_start_address + interpreter_state_list_head, + sizeof(void*), + &address_of_interpreter_state); if (bytes == -1) { cleanup_proc_handle(&handle); return -1; @@ -729,15 +736,15 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) int is_remote_debugging_enabled = 0; bytes = read_memory( - &handle, - address_of_interpreter_state + local_debug_offsets.debugger_support.remote_debugging_enabled, - sizeof(int), - &is_remote_debugging_enabled); + &handle, + address_of_interpreter_state + local_debug_offsets.debugger_support.remote_debugging_enabled, + sizeof(int), + &is_remote_debugging_enabled); if (bytes == -1) { cleanup_proc_handle(&handle); return -1; } - + if (is_remote_debugging_enabled == 0) { PyErr_SetString(PyExc_RuntimeError, "Remote debugging is not enabled in the remote process"); cleanup_proc_handle(&handle); @@ -746,37 +753,37 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) uintptr_t address_of_thread; pid_t this_tid = 0; - + if (tid != 0) { bytes = read_memory( - &handle, - address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_head, - sizeof(void*), - &address_of_thread); + &handle, + address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_head, + sizeof(void*), + &address_of_thread); if (bytes == -1) { cleanup_proc_handle(&handle); return -1; } while (address_of_thread != 0) { bytes = read_memory( - &handle, - address_of_thread + local_debug_offsets.thread_state.native_thread_id, - sizeof(pid_t), - &this_tid); + &handle, + address_of_thread + local_debug_offsets.thread_state.native_thread_id, + sizeof(pid_t), + &this_tid); if (bytes == -1) { cleanup_proc_handle(&handle); return -1; } - + if (this_tid == tid) { break; } - + bytes = read_memory( - &handle, - address_of_thread + local_debug_offsets.thread_state.next, - sizeof(void*), - &address_of_thread); + &handle, + address_of_thread + local_debug_offsets.thread_state.next, + sizeof(void*), + &address_of_thread); if (bytes == -1) { cleanup_proc_handle(&handle); return -1; @@ -784,10 +791,10 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) } } else { bytes = read_memory( - &handle, - address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_main, - sizeof(void*), - &address_of_thread); + &handle, + address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_main, + sizeof(void*), + &address_of_thread); if (bytes == -1) { cleanup_proc_handle(&handle); return -1; @@ -802,15 +809,15 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) uintptr_t eval_breaker; bytes = read_memory( - &handle, - address_of_thread + local_debug_offsets.debugger_support.eval_breaker, - sizeof(uintptr_t), - &eval_breaker); + &handle, + address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker); if (bytes == -1) { cleanup_proc_handle(&handle); return -1; } - + eval_breaker |= _PY_EVAL_PLEASE_STOP_BIT; // Ensure our path is not too long @@ -822,14 +829,14 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) if (debugger_script_path != NULL) { uintptr_t debugger_script_path_addr = ( - address_of_thread + - local_debug_offsets.debugger_support.remote_debugger_support + - local_debug_offsets.debugger_support.debugger_script_path); + address_of_thread + + local_debug_offsets.debugger_support.remote_debugger_support + + local_debug_offsets.debugger_support.debugger_script_path); bytes = write_memory( - &handle, - debugger_script_path_addr, - strlen(debugger_script_path) + 1, - debugger_script_path); + &handle, + debugger_script_path_addr, + strlen(debugger_script_path) + 1, + debugger_script_path); if (bytes == -1) { cleanup_proc_handle(&handle); return -1; @@ -838,14 +845,14 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) int pending_call = 1; uintptr_t debugger_pending_call_addr = ( - address_of_thread + - local_debug_offsets.debugger_support.remote_debugger_support + - local_debug_offsets.debugger_support.debugger_pending_call); + address_of_thread + + local_debug_offsets.debugger_support.remote_debugger_support + + local_debug_offsets.debugger_support.debugger_pending_call); bytes = write_memory( - &handle, - debugger_pending_call_addr, - sizeof(int), - &pending_call); + &handle, + debugger_pending_call_addr, + sizeof(int), + &pending_call); if (bytes == -1) { cleanup_proc_handle(&handle); @@ -853,10 +860,10 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) } bytes = write_memory( - &handle, - address_of_thread + local_debug_offsets.debugger_support.eval_breaker, - sizeof(uintptr_t), - &eval_breaker); + &handle, + address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker); if (bytes == -1) { cleanup_proc_handle(&handle); @@ -864,13 +871,13 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) } bytes = read_memory( - &handle, - address_of_thread + local_debug_offsets.debugger_support.eval_breaker, - sizeof(uintptr_t), - &eval_breaker); + &handle, + address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker); printf("Eval breaker: %p\n", (void*)eval_breaker); cleanup_proc_handle(&handle); return 0; -} + } From 075ca6554773e72b541a15c3cf0679103b55481a Mon Sep 17 00:00:00 2001 From: Ivona99 Date: Thu, 27 Mar 2025 11:14:38 +0100 Subject: [PATCH 09/36] Write to process memory on Windows --- Python/remote_debugging.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index efb17f4c341f33..46f8f44a61844c 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -610,9 +610,16 @@ static Py_ssize_t write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) { #ifdef MS_WINDOWS - // TODO: Implement this function - PyErr_SetString(PyExc_RuntimeError, "Memory writing is not supported on Windows"); + SIZE_T written = 0; + SIZE_T result = 0; + do { + if (!WriteProcessMemory(handle->hProcess, (LPVOID)(remote_address + result), (const char*)src + result, len - result, &written)) { + PyErr_SetFromWindowsErr(0); return -1; + } + result += written; + } while (result < len); + return (Py_ssize_t)result; #elif defined(__linux__) && HAVE_PROCESS_VM_READV struct iovec local[1]; struct iovec remote[1]; From 45e73c59ff596069b518f442b97ae5bcfcea5c06 Mon Sep 17 00:00:00 2001 From: Ivona99 Date: Fri, 28 Mar 2025 14:32:20 +0100 Subject: [PATCH 10/36] Prevent attaching to a process with a different architecture Ensure that the caller and target processes have matching architectures before proceeding. Attaching to a 64-bit process from a 32-bit process will fail when using CreateToolhelp32Snapshot, and attempting to attach to a 32-bit process from a 64-bit process may cause issues during PE file parsing. To avoid potential errors abort the operation if the architectures differ. --- Python/remote_debugging.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 46f8f44a61844c..0f414e1b4ed1b5 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -422,6 +422,28 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* m #ifdef MS_WINDOWS +static BOOL is_process64Bit(HANDLE hProcess) { + BOOL isWow64 = FALSE; + if (IsWow64Process(hProcess, &isWow64)) { + return !isWow64; + } + else { + PyErr_SetString(PyExc_RuntimeError, "Failed to determine the architecture of the process."); + return FALSE; + } +} + +static +BOOL is_current_process64Bit() { +#if defined(_WIN64) + return TRUE; +#elif defined(_WIN32) + return is_process64Bit(GetCurrentProcess()); +#else + return FALSE; +#endif +} + static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* secname) { HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hFile == INVALID_HANDLE_VALUE) { @@ -482,6 +504,16 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* static uintptr_t search_windows_map_for_section(proc_handle_t* handle, const char* secname, const wchar_t* substr) { + // Check if the architecture of the current process matches the target process + BOOL currentProcess64Bit = is_current_process64Bit(); + BOOL targetProcess64Bit = is_process64Bit(handle->hProcess); + + // If the architectures of the current and target processes differ, abort + if (currentProcess64Bit != targetProcess64Bit) { + PyErr_SetString(PyExc_RuntimeError, "Bitness mismatch between current process and target process."); + return 0; + } + HANDLE hProcSnap; do { hProcSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, handle->pid); From ed2f32535e87a0b3af147f4dced00aa174c37a11 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 18:10:14 -0400 Subject: [PATCH 11/36] Revert "Prevent attaching to a process with a different architecture" This reverts commit 746ecfc5b197e28c492dc5db004e81a9bf6cf48d. That commit isn't correct. It conflates the return from `IsWow64Process` (whether the process is running under 32-bit emulation on a 64-bit process) with whether the process is 64-bit. A 32-bit process on 32-bit Windows would have `IsWow64Process` set our `isWow64` flag to `FALSE`, and we'd then incorrectly return `TRUE` from `is_process64Bit`. --- Python/remote_debugging.c | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 0f414e1b4ed1b5..46f8f44a61844c 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -422,28 +422,6 @@ search_map_for_section(proc_handle_t *handle, const char* secname, const char* m #ifdef MS_WINDOWS -static BOOL is_process64Bit(HANDLE hProcess) { - BOOL isWow64 = FALSE; - if (IsWow64Process(hProcess, &isWow64)) { - return !isWow64; - } - else { - PyErr_SetString(PyExc_RuntimeError, "Failed to determine the architecture of the process."); - return FALSE; - } -} - -static -BOOL is_current_process64Bit() { -#if defined(_WIN64) - return TRUE; -#elif defined(_WIN32) - return is_process64Bit(GetCurrentProcess()); -#else - return FALSE; -#endif -} - static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* secname) { HANDLE hFile = CreateFileW(mod_path, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (hFile == INVALID_HANDLE_VALUE) { @@ -504,16 +482,6 @@ static void* analyze_pe(const wchar_t* mod_path, BYTE* remote_base, const char* static uintptr_t search_windows_map_for_section(proc_handle_t* handle, const char* secname, const wchar_t* substr) { - // Check if the architecture of the current process matches the target process - BOOL currentProcess64Bit = is_current_process64Bit(); - BOOL targetProcess64Bit = is_process64Bit(handle->hProcess); - - // If the architectures of the current and target processes differ, abort - if (currentProcess64Bit != targetProcess64Bit) { - PyErr_SetString(PyExc_RuntimeError, "Bitness mismatch between current process and target process."); - return 0; - } - HANDLE hProcSnap; do { hProcSnap = CreateToolhelp32Snapshot(TH32CS_SNAPMODULE, handle->pid); From a9d3ea98346d92b589c4e33f09b57de2ff6bdc9d Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 19:02:02 -0400 Subject: [PATCH 12/36] Remove unused struct member --- Include/cpython/pystate.h | 1 - 1 file changed, 1 deletion(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index 7a04c1409fdeba..f9d1ca3dee0da5 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -32,7 +32,6 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); /* Remote debugger support */ # define MAX_SCRIPT_PATH_SIZE 512 typedef struct _remote_debugger_support { - int enabled; int debugger_pending_call; char debugger_script_path[MAX_SCRIPT_PATH_SIZE]; } _PyRemoteDebuggerSupport; From e235e624b65d12fcc2794f994127bf06e5eb48ff Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 19:03:18 -0400 Subject: [PATCH 13/36] Set an explicit size for a struct member This is useful because debuggers will write to this variable remotely, and it's helpful for it to have a well known size rather than one that could vary per platform. --- Include/cpython/pystate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h index f9d1ca3dee0da5..05b618797a63cc 100644 --- a/Include/cpython/pystate.h +++ b/Include/cpython/pystate.h @@ -32,7 +32,7 @@ typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); /* Remote debugger support */ # define MAX_SCRIPT_PATH_SIZE 512 typedef struct _remote_debugger_support { - int debugger_pending_call; + int32_t debugger_pending_call; char debugger_script_path[MAX_SCRIPT_PATH_SIZE]; } _PyRemoteDebuggerSupport; From d51dda089a87bd18c016360d20e7585666eb4271 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 19:04:16 -0400 Subject: [PATCH 14/36] Revert an accidental whitespace change --- Include/internal/pycore_interp_structs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Include/internal/pycore_interp_structs.h b/Include/internal/pycore_interp_structs.h index 26aac26382fe49..ee92a1e6f2dec9 100644 --- a/Include/internal/pycore_interp_structs.h +++ b/Include/internal/pycore_interp_structs.h @@ -936,6 +936,7 @@ struct _is { _PyThreadStateImpl _initial_thread; // _initial_thread should be the last field of PyInterpreterState. // See https://github.com/python/cpython/issues/127117. + #if !defined(Py_GIL_DISABLED) && defined(Py_STACKREF_DEBUG) uint64_t next_stackref; _Py_hashtable_t *open_stackrefs_table; From 38a4d51f679e5bed68de3396c91a26e50f9ef532 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 19:11:35 -0400 Subject: [PATCH 15/36] Factor running a debugger script into a helper function --- Python/ceval_gil.c | 118 +++++++++++++++++++++++++-------------------- 1 file changed, 66 insertions(+), 52 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 361a01d2c5c181..604445c4bae10f 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1192,6 +1192,71 @@ _PyEval_DisableGIL(PyThreadState *tstate) } #endif +#ifdef Py_REMOTE_DEBUG +// Note that this function is inline to avoid creating a PLT entry +// that would be an easy target for a ROP gadget. +static inline void run_remote_debugger_script(const char *path) +{ + if (0 != PySys_Audit("remote_debugger_script", "s", path)) { + PyErr_FormatUnraisable("Error when auditing remote debugger script %s", path); + return; + } + + // Open the debugger script with the open code hook. Unfortunately this forces us to handle + // the resulting Python object, which is a file object and therefore we need to call + // Python methods on it instead of the simpler C equivalents. + PyObject* fileobj = PyFile_OpenCode(path); + if (!fileobj) { + PyErr_FormatUnraisable("Error when opening debugger script %s", path); + return; + } + +#ifdef MS_WINDOWS + PyObject* path_obj = PyUnicode_FromString(path); + if (!path_obj) { + PyErr_FormatUnraisable("Error when converting remote debugger script path %s to Unicode", path); + return; + } + wchar_t* wpath = PyUnicode_AsWideCharString(path_obj, NULL); + Py_DECREF(path_obj); + if (!wpath) { + PyErr_FormatUnraisable("Error when converting remote debugger script path %s to wide char", path); + return; + } + FILE* f = _wfopen(wpath, L"r"); +#else + int fd = PyObject_AsFileDescriptor(fileobj); + if (fd == -1) { + PyErr_FormatUnraisable("Error when getting file descriptor for debugger script %s", path); + return; + } + FILE* f = fdopen(fd, "r"); +#endif + + if (!f) { + PyErr_SetFromErrno(PyExc_OSError); + } else { + PyRun_AnyFile(f, path); + } + +#ifdef MS_WINDOWS + PyMem_Free(wpath); + fclose(f); +#endif + + if (PyErr_Occurred()) { + PyErr_FormatUnraisable("Error executing debugger script %s", path); + } + + PyObject* res = PyObject_CallMethod(fileobj, "close", ""); + if (!res) { + PyErr_FormatUnraisable("Error when closing debugger script %s", path); + } else { + Py_DECREF(res); + } + Py_DECREF(fileobj); +} +#endif /* Do periodic things, like check for signals and async I/0. * We need to do reasonably frequently, but not too frequently. @@ -1327,58 +1392,7 @@ _Py_HandlePending(PyThreadState *tstate) tstate->remote_debugger_support.debugger_pending_call = 0; const char *path = tstate->remote_debugger_support.debugger_script_path; if (*path) { - if (0 != PySys_Audit("remote_debugger_script", "s", path)) { - PyErr_FormatUnraisable("Error when auditing remote debugger script %s", path); - } else { - // Open the debugger script with the open code hook. Unfortunately this forces us to handle - // the resulting Python object, which is a file object and therefore we need to call - // Python methods on it instead of the simpler C equivalents. - PyObject* fileobj = PyFile_OpenCode(path); - if (!fileobj) { - PyErr_FormatUnraisable("Error when opening debugger script %s", path); - return 0; - } -#ifdef MS_WINDOWS - PyObject* path_obj = PyUnicode_FromString(path); - if (!path_obj) { - PyErr_FormatUnraisable("Error when converting remote debugger script path %s to Unicode", path); - return 0; - } - wchar_t* wpath = PyUnicode_AsWideCharString(path_obj, NULL); - Py_DECREF(path_obj); - if (!wpath) { - PyErr_FormatUnraisable("Error when converting remote debugger script path %s to wide char", path); - return 0; - } - FILE* f = _wfopen(wpath, L"r"); -#else - int fd = PyObject_AsFileDescriptor(fileobj); - if (fd == -1) { - PyErr_FormatUnraisable("Error when getting file descriptor for debugger script %s", path); - return 0; - } - FILE* f = fdopen(fd, "r"); -#endif - if (!f) { - PyErr_SetFromErrno(PyExc_OSError); - } else { - PyRun_AnyFile(f, path); - } -#ifdef MS_WINDOWS - PyMem_Free(wpath); - fclose(f); -#endif - if (PyErr_Occurred()) { - PyErr_FormatUnraisable("Error executing debugger script %s", path); - } - PyObject* res = PyObject_CallMethod(fileobj, "close", ""); - if (!res) { - PyErr_FormatUnraisable("Error when closing debugger script %s", path); - } else { - Py_DECREF(res); - } - Py_DECREF(fileobj); - } + run_remote_debugger_script(path); } } } From a98898dfdaeafec8483e8bb657ff12c972489306 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 19:12:21 -0400 Subject: [PATCH 16/36] Harden remote debugging integration Require the flags for turning this on to be set to exactly 1 to avoid accidentally triggering remote debugging in the case of heap corruption. Make a heap copy of the script path before using it to avoid the buffer being overwritten while we're still using it by another debugger. --- Python/ceval_gil.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 604445c4bae10f..739e4a19197930 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1387,13 +1387,29 @@ _Py_HandlePending(PyThreadState *tstate) #ifdef Py_REMOTE_DEBUG const PyConfig *config = _PyInterpreterState_GetConfig(tstate->interp); - if (config->remote_debug) { - if (tstate->remote_debugger_support.debugger_pending_call) { - tstate->remote_debugger_support.debugger_pending_call = 0; - const char *path = tstate->remote_debugger_support.debugger_script_path; + if (config->remote_debug == 1 + && tstate->remote_debugger_support.debugger_pending_call == 1) + { + tstate->remote_debugger_support.debugger_pending_call = 0; + + // Immediately make a copy in case of a race with another debugger + // process that's trying to write to the buffer. At least this way + // we'll be internally consistent: what we audit is what we run. + const size_t pathsz + = sizeof(tstate->remote_debugger_support.debugger_script_path); + + char *path = PyMem_Malloc(pathsz); + if (path) { + // And don't assume the debugger correctly null terminated it. + memcpy( + path, + tstate->remote_debugger_support.debugger_script_path, + pathsz); + path[pathsz - 1] = '\0'; if (*path) { run_remote_debugger_script(path); } + PyMem_Free(path); } } #endif From 997b557ce769e9c92b5d59a4ae53a3504b9bf8e3 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 19:23:18 -0400 Subject: [PATCH 17/36] Use _fdopen to open the debugger script on Windows This avoids the need to reopen it by (wide character) path. --- Python/ceval_gil.c | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 739e4a19197930..914ea8a38e4d8f 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1211,25 +1211,14 @@ static inline void run_remote_debugger_script(const char *path) return; } -#ifdef MS_WINDOWS - PyObject* path_obj = PyUnicode_FromString(path); - if (!path_obj) { - PyErr_FormatUnraisable("Error when converting remote debugger script path %s to Unicode", path); - return; - } - wchar_t* wpath = PyUnicode_AsWideCharString(path_obj, NULL); - Py_DECREF(path_obj); - if (!wpath) { - PyErr_FormatUnraisable("Error when converting remote debugger script path %s to wide char", path); - return; - } - FILE* f = _wfopen(wpath, L"r"); -#else int fd = PyObject_AsFileDescriptor(fileobj); if (fd == -1) { PyErr_FormatUnraisable("Error when getting file descriptor for debugger script %s", path); return; } +#ifdef MS_WINDOWS + FILE* f = _fdopen(fd, "r"); +#else FILE* f = fdopen(fd, "r"); #endif @@ -1239,11 +1228,6 @@ static inline void run_remote_debugger_script(const char *path) PyRun_AnyFile(f, path); } -#ifdef MS_WINDOWS - PyMem_Free(wpath); - fclose(f); -#endif - if (PyErr_Occurred()) { PyErr_FormatUnraisable("Error executing debugger script %s", path); } From f6dec5964270de951350b0d57b433f867a4b36d3 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 19:58:29 -0400 Subject: [PATCH 18/36] Ensure the debugger script is always closed Previously it was leaked if `PyObject_AsFileDescriptor` failed. --- Python/ceval_gil.c | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/Python/ceval_gil.c b/Python/ceval_gil.c index 914ea8a38e4d8f..8fd8f48ce5c576 100644 --- a/Python/ceval_gil.c +++ b/Python/ceval_gil.c @@ -1198,43 +1198,42 @@ _PyEval_DisableGIL(PyThreadState *tstate) static inline void run_remote_debugger_script(const char *path) { if (0 != PySys_Audit("remote_debugger_script", "s", path)) { - PyErr_FormatUnraisable("Error when auditing remote debugger script %s", path); + PyErr_FormatUnraisable( + "Audit hook failed for remote debugger script %s", path); return; } - // Open the debugger script with the open code hook. Unfortunately this forces us to handle - // the resulting Python object, which is a file object and therefore we need to call - // Python methods on it instead of the simpler C equivalents. + // Open the debugger script with the open code hook, and reopen the + // resulting file object to get a C FILE* object. PyObject* fileobj = PyFile_OpenCode(path); if (!fileobj) { - PyErr_FormatUnraisable("Error when opening debugger script %s", path); + PyErr_FormatUnraisable("Can't open debugger script %s", path); return; } int fd = PyObject_AsFileDescriptor(fileobj); if (fd == -1) { - PyErr_FormatUnraisable("Error when getting file descriptor for debugger script %s", path); - return; - } + PyErr_FormatUnraisable("Can't find fd for debugger script %s", path); + } else { #ifdef MS_WINDOWS - FILE* f = _fdopen(fd, "r"); + FILE* f = _fdopen(fd, "r"); #else - FILE* f = fdopen(fd, "r"); + FILE* f = fdopen(fd, "r"); #endif + if (!f) { + PyErr_SetFromErrno(PyExc_OSError); + } else { + PyRun_AnyFile(f, path); + } - if (!f) { - PyErr_SetFromErrno(PyExc_OSError); - } else { - PyRun_AnyFile(f, path); - } - - if (PyErr_Occurred()) { - PyErr_FormatUnraisable("Error executing debugger script %s", path); + if (PyErr_Occurred()) { + PyErr_FormatUnraisable("Error executing debugger script %s", path); + } } PyObject* res = PyObject_CallMethod(fileobj, "close", ""); if (!res) { - PyErr_FormatUnraisable("Error when closing debugger script %s", path); + PyErr_FormatUnraisable("Error closing debugger script %s", path); } else { Py_DECREF(res); } From d273c5b44c22b9b65147f231ab7bd6b0adf34fb1 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 20:02:30 -0400 Subject: [PATCH 19/36] Fix incorrect docstring I think this was a copy paste error. --- Python/sysmodule.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index e310b4fb7411d0..9312aa7bb3ffc4 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2426,8 +2426,6 @@ sys_is_stack_trampoline_active_impl(PyObject *module) sys.is_remote_debug_enabled Return True if remote debugging is enabled, False otherwise. - -If no stack profiler is activated, this function has no effect. [clinic start generated code]*/ static PyObject * @@ -2479,7 +2477,7 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) if (attr == INVALID_FILE_ATTRIBUTES) { DWORD err = GetLastError(); PyMem_Free(debugger_script_path_w); - + if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) { PyErr_SetString(PyExc_FileNotFoundError, "Script file does not exist"); } @@ -2498,14 +2496,14 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) PyMem_Free(debugger_script_path_w); return NULL; } - + PyMem_Free(debugger_script_path_w); #else const char *debugger_script_path = PyUnicode_AsUTF8(script); if (debugger_script_path == NULL) { return NULL; } - + if (access(debugger_script_path, F_OK | R_OK) != 0) { switch (errno) { case ENOENT: From c9a2146a9991fa563f9342bd85320a6e6297a932 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 20:13:19 -0400 Subject: [PATCH 20/36] Document restrictions for remote_exec The remote process must be a compatible version. Explain our compatibility rules. --- Python/sysmodule.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 9312aa7bb3ffc4..09ea8ab91f149f 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2455,6 +2455,11 @@ code has been executed. The caller is responsible for making sure that the file still exists whenever the remote process tries to read it and that it hasn't been overwritten. +The remote process must be running a CPython interpreter of the same major +and minor version as the local process. If either the local or remote +interpreter is pre-release (alpha, beta, or release candidate) then the +local and remote interpreters must be the same exact version. + Args: pid (int): The process ID of the target Python process. script (str|bytes): The path to a file containing From 4af1744de1e825bc90b569d3f736037a4e8a78cc Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 20:34:11 -0400 Subject: [PATCH 21/36] Simplify handling of Windows paths We can clean this up by freeing strings as soon as we're done with them, and remove some duplicate code. --- Python/sysmodule.c | 25 ++++++++----------------- 1 file changed, 8 insertions(+), 17 deletions(-) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index 09ea8ab91f149f..d7106d728d9880 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2470,8 +2470,13 @@ static PyObject * sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) /*[clinic end generated code: output=7d94c56afe4a52c0 input=5749b0253d5b588c]*/ { + const char *debugger_script_path = PyUnicode_AsUTF8(script); + if (debugger_script_path == NULL) { + return NULL; + } + #ifdef MS_WINDOWS - // Get UTF-16 (wide char) version of the path for Windows + // Use UTF-16 (wide char) version of the path for permission checks wchar_t *debugger_script_path_w = PyUnicode_AsWideCharString(script, NULL); if (debugger_script_path_w == NULL) { return NULL; @@ -2479,10 +2484,9 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) // Check file attributes using wide character version (W) instead of ANSI (A) DWORD attr = GetFileAttributesW(debugger_script_path_w); + PyMem_Free(debugger_script_path_w); if (attr == INVALID_FILE_ATTRIBUTES) { DWORD err = GetLastError(); - PyMem_Free(debugger_script_path_w); - if (err == ERROR_FILE_NOT_FOUND || err == ERROR_PATH_NOT_FOUND) { PyErr_SetString(PyExc_FileNotFoundError, "Script file does not exist"); } @@ -2494,21 +2498,7 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) } return NULL; } - - // Get UTF-8 version for the rest of the code - const char *debugger_script_path = PyUnicode_AsUTF8(script); - if (debugger_script_path == NULL) { - PyMem_Free(debugger_script_path_w); - return NULL; - } - - PyMem_Free(debugger_script_path_w); #else - const char *debugger_script_path = PyUnicode_AsUTF8(script); - if (debugger_script_path == NULL) { - return NULL; - } - if (access(debugger_script_path, F_OK | R_OK) != 0) { switch (errno) { case ENOENT: @@ -2523,6 +2513,7 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) return NULL; } #endif + if (_PySysRemoteDebug_SendExec(pid, 0, debugger_script_path) < 0) { return NULL; } From 5c0b8b94b4a3ddf1a623240dec3d17f264dd010e Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 20:45:14 -0400 Subject: [PATCH 22/36] Make remote_exec accept `bytes` paths This was documented as working, but in fact we assumed that the path would always be a unicode string. Factor the version that only accepts a unicode string into a helper function, and have the caller call `os.fsdecode` on the input path, which accepts either a byte string or a unicode string and always returns a unicode string. --- Python/sysmodule.c | 72 ++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/Python/sysmodule.c b/Python/sysmodule.c index d7106d728d9880..dc7f45f02f128d 100644 --- a/Python/sysmodule.c +++ b/Python/sysmodule.c @@ -2440,35 +2440,8 @@ sys_is_remote_debug_enabled_impl(PyObject *module) #endif } -/*[clinic input] -sys.remote_exec - - pid: int - script: object - -Executes a file containing Python code in a given remote Python process. - -This function returns immediately, and the code will be executed by the -target process's main thread at the next available opportunity, similarly -to how signals are handled. There is no interface to determine when the -code has been executed. The caller is responsible for making sure that -the file still exists whenever the remote process tries to read it and that -it hasn't been overwritten. - -The remote process must be running a CPython interpreter of the same major -and minor version as the local process. If either the local or remote -interpreter is pre-release (alpha, beta, or release candidate) then the -local and remote interpreters must be the same exact version. - -Args: - pid (int): The process ID of the target Python process. - script (str|bytes): The path to a file containing - the Python code to be executed. -[clinic start generated code]*/ - static PyObject * -sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) -/*[clinic end generated code: output=7d94c56afe4a52c0 input=5749b0253d5b588c]*/ +sys_remote_exec_unicode_path(PyObject *module, int pid, PyObject *script) { const char *debugger_script_path = PyUnicode_AsUTF8(script); if (debugger_script_path == NULL) { @@ -2521,6 +2494,49 @@ sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) Py_RETURN_NONE; } +/*[clinic input] +sys.remote_exec + + pid: int + script: object + +Executes a file containing Python code in a given remote Python process. + +This function returns immediately, and the code will be executed by the +target process's main thread at the next available opportunity, similarly +to how signals are handled. There is no interface to determine when the +code has been executed. The caller is responsible for making sure that +the file still exists whenever the remote process tries to read it and that +it hasn't been overwritten. + +The remote process must be running a CPython interpreter of the same major +and minor version as the local process. If either the local or remote +interpreter is pre-release (alpha, beta, or release candidate) then the +local and remote interpreters must be the same exact version. + +Args: + pid (int): The process ID of the target Python process. + script (str|bytes): The path to a file containing + the Python code to be executed. +[clinic start generated code]*/ + +static PyObject * +sys_remote_exec_impl(PyObject *module, int pid, PyObject *script) +/*[clinic end generated code: output=7d94c56afe4a52c0 input=5749b0253d5b588c]*/ +{ + PyObject *ret = NULL; + PyObject *os = PyImport_ImportModule("os"); + if (os) { + PyObject *path = PyObject_CallMethod(os, "fsdecode", "O", script); + if (path) { + ret = sys_remote_exec_unicode_path(module, pid, path); + Py_DECREF(path); + } + Py_DECREF(os); + } + return ret; +} + /*[clinic input] From c8779cdbd153b2dc833e259f2e0d50258157f34c Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 26 Mar 2025 18:56:18 -0400 Subject: [PATCH 23/36] Refactor to avoid duplicate error handling --- Python/remote_debugging.c | 82 +++++++++++++++------------------------ 1 file changed, 32 insertions(+), 50 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 46f8f44a61844c..643e34cca1b3b4 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -691,34 +691,13 @@ read_offsets( return 0; } -int -_PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) +static int +send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_script_path) { -#if (!defined(__linux__) && !defined(__APPLE__) && !defined(MS_WINDOWS)) || (defined(__linux__) && !HAVE_PROCESS_VM_READV) - PyErr_SetString(PyExc_RuntimeError, "Remote debugging is not supported on this platform"); - return -1; -#endif - - proc_handle_t handle; - if (init_proc_handle(&handle, pid) < 0) { - return -1; - } - -#ifdef MS_WINDOWS - if (debugger_script_path != NULL && strlen(debugger_script_path) > MAX_PATH) { -#else - if (debugger_script_path != NULL && strlen(debugger_script_path) > PATH_MAX) { -#endif - PyErr_SetString(PyExc_ValueError, "Debugger script path is too long"); - cleanup_proc_handle(&handle); - return -1; - } - uintptr_t runtime_start_address; struct _Py_DebugOffsets local_debug_offsets; - if (read_offsets(&handle, &runtime_start_address, &local_debug_offsets)) { - cleanup_proc_handle(&handle); + if (read_offsets(handle, &runtime_start_address, &local_debug_offsets)) { return -1; } @@ -726,35 +705,31 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) uintptr_t address_of_interpreter_state; Py_ssize_t bytes = read_memory( - &handle, + handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), &address_of_interpreter_state); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } if (address_of_interpreter_state == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); - cleanup_proc_handle(&handle); return -1; } int is_remote_debugging_enabled = 0; bytes = read_memory( - &handle, + handle, address_of_interpreter_state + local_debug_offsets.debugger_support.remote_debugging_enabled, sizeof(int), &is_remote_debugging_enabled); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } if (is_remote_debugging_enabled == 0) { PyErr_SetString(PyExc_RuntimeError, "Remote debugging is not enabled in the remote process"); - cleanup_proc_handle(&handle); return -1; } @@ -763,22 +738,20 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) if (tid != 0) { bytes = read_memory( - &handle, + handle, address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_head, sizeof(void*), &address_of_thread); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } while (address_of_thread != 0) { bytes = read_memory( - &handle, + handle, address_of_thread + local_debug_offsets.thread_state.native_thread_id, sizeof(pid_t), &this_tid); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } @@ -787,41 +760,37 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) } bytes = read_memory( - &handle, + handle, address_of_thread + local_debug_offsets.thread_state.next, sizeof(void*), &address_of_thread); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } } } else { bytes = read_memory( - &handle, + handle, address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_main, sizeof(void*), &address_of_thread); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } } if (address_of_thread == 0) { PyErr_SetString(PyExc_RuntimeError, "No thread state found"); - cleanup_proc_handle(&handle); return -1; } uintptr_t eval_breaker; bytes = read_memory( - &handle, + handle, address_of_thread + local_debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } @@ -830,7 +799,6 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) // Ensure our path is not too long if (local_debug_offsets.debugger_support.debugger_script_path_size <= strlen(debugger_script_path)) { PyErr_SetString(PyExc_ValueError, "Debugger script path is too long"); - cleanup_proc_handle(&handle); return -1; } @@ -840,12 +808,11 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) local_debug_offsets.debugger_support.remote_debugger_support + local_debug_offsets.debugger_support.debugger_script_path); bytes = write_memory( - &handle, + handle, debugger_script_path_addr, strlen(debugger_script_path) + 1, debugger_script_path); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } } @@ -856,35 +823,50 @@ _PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) local_debug_offsets.debugger_support.remote_debugger_support + local_debug_offsets.debugger_support.debugger_pending_call); bytes = write_memory( - &handle, + handle, debugger_pending_call_addr, sizeof(int), &pending_call); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } bytes = write_memory( - &handle, + handle, address_of_thread + local_debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker); if (bytes == -1) { - cleanup_proc_handle(&handle); return -1; } bytes = read_memory( - &handle, + handle, address_of_thread + local_debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker); printf("Eval breaker: %p\n", (void*)eval_breaker); - cleanup_proc_handle(&handle); return 0; +} + +int +_PySysRemoteDebug_SendExec(int pid, int tid, const char *debugger_script_path) +{ +#if (!defined(__linux__) && !defined(__APPLE__) && !defined(MS_WINDOWS)) || (defined(__linux__) && !HAVE_PROCESS_VM_READV) + PyErr_SetString(PyExc_RuntimeError, "Remote debugging is not supported on this platform"); + return -1; +#endif + + proc_handle_t handle; + if (init_proc_handle(&handle, pid) < 0) { + return -1; } + + int rc = send_exec_to_proc_handle(&handle, tid, debugger_script_path); + cleanup_proc_handle(&handle); + return rc; +} From 68890423f02da9cb5fa963486d94a5db84fc762d Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 26 Mar 2025 19:52:08 -0400 Subject: [PATCH 24/36] Check for debug offset compatibility before using the offsets --- Python/remote_debugging.c | 71 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 643e34cca1b3b4..9ba533b11dc45e 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -668,6 +668,72 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const #endif } +static int +is_prerelease_version(uint64_t version) +{ + return (version & 0xF0) != 0xF0; +} + +static int +ensure_debug_offset_compatibility(const _Py_DebugOffsets* debug_offsets) +{ + if (memcmp(debug_offsets->cookie, _Py_Debug_Cookie, sizeof(debug_offsets->cookie)) != 0) { + // The remote is probably running a Python version predating debug offsets. + PyErr_SetString( + PyExc_RuntimeError, + "Can't determine the Python version of the remote process"); + return -1; + } + + // Assume debug offsets could change from one pre-release version to another, + // or one minor version to another, but are stable across patch versions. + if (is_prerelease_version(Py_Version) && Py_Version != debug_offsets->version) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't send commands from a pre-release Python interpreter" + " to a process running a different Python version"); + return -1; + } + + if (is_prerelease_version(debug_offsets->version) && Py_Version != debug_offsets->version) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't send commands to a pre-release Python interpreter" + " from a process running a different Python version"); + return -1; + } + + unsigned int remote_major = (debug_offsets->version >> 24) & 0xFF; + unsigned int remote_minor = (debug_offsets->version >> 16) & 0xFF; + + if (PY_MAJOR_VERSION != remote_major || PY_MINOR_VERSION != remote_minor) { + PyErr_Format( + PyExc_RuntimeError, + "Can't send commands from a Python %d.%d process to a Python %d.%d process", + PY_MAJOR_VERSION, PY_MINOR_VERSION, remote_major, remote_minor); + return -1; + } + + // The debug offsets differ between free threaded and non-free threaded builds. + if (_Py_Debug_Free_Threaded && !debug_offsets->free_threaded) { + PyErr_SetString( + PyExc_RuntimeError, + "Cannot send commands from a free-threaded Python process" + " to a process running a non-free-threaded version"); + return -1; + } + + if (!_Py_Debug_Free_Threaded && debug_offsets->free_threaded) { + PyErr_SetString( + PyExc_RuntimeError, + "Cannot send commands to a free-threaded Python process" + " from a process running a non-free-threaded version"); + return -1; + } + + return 0; +} + static int read_offsets( proc_handle_t *handle, @@ -685,7 +751,10 @@ read_offsets( size_t size = sizeof(struct _Py_DebugOffsets); Py_ssize_t bytes = read_memory( handle, *runtime_start_address, size, debug_offsets); - if (bytes == -1) { + if (bytes < 0 || (size_t)bytes != size) { + return -1; + } + if (ensure_debug_offset_compatibility(debug_offsets)) { return -1; } return 0; From 7f7aa8bb346ec5bd5f6bedca3695ac078f4b9987 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 26 Mar 2025 19:52:24 -0400 Subject: [PATCH 25/36] Give a some variables shorter names --- Python/remote_debugging.c | 54 +++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 9ba533b11dc45e..73f61eb2cc6d52 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -764,25 +764,25 @@ static int send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_script_path) { uintptr_t runtime_start_address; - struct _Py_DebugOffsets local_debug_offsets; + struct _Py_DebugOffsets debug_offsets; - if (read_offsets(handle, &runtime_start_address, &local_debug_offsets)) { + if (read_offsets(handle, &runtime_start_address, &debug_offsets)) { return -1; } - uintptr_t interpreter_state_list_head = local_debug_offsets.runtime_state.interpreters_head; + uintptr_t interpreter_state_list_head = debug_offsets.runtime_state.interpreters_head; - uintptr_t address_of_interpreter_state; + uintptr_t interpreter_state_addr; Py_ssize_t bytes = read_memory( handle, runtime_start_address + interpreter_state_list_head, sizeof(void*), - &address_of_interpreter_state); + &interpreter_state_addr); if (bytes == -1) { return -1; } - if (address_of_interpreter_state == 0) { + if (interpreter_state_addr == 0) { PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); return -1; } @@ -790,7 +790,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc int is_remote_debugging_enabled = 0; bytes = read_memory( handle, - address_of_interpreter_state + local_debug_offsets.debugger_support.remote_debugging_enabled, + interpreter_state_addr + debug_offsets.debugger_support.remote_debugging_enabled, sizeof(int), &is_remote_debugging_enabled); if (bytes == -1) { @@ -802,22 +802,22 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc return -1; } - uintptr_t address_of_thread; + uintptr_t thread_state_addr; pid_t this_tid = 0; if (tid != 0) { bytes = read_memory( handle, - address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_head, + interpreter_state_addr + debug_offsets.interpreter_state.threads_head, sizeof(void*), - &address_of_thread); + &thread_state_addr); if (bytes == -1) { return -1; } - while (address_of_thread != 0) { + while (thread_state_addr != 0) { bytes = read_memory( handle, - address_of_thread + local_debug_offsets.thread_state.native_thread_id, + thread_state_addr + debug_offsets.thread_state.native_thread_id, sizeof(pid_t), &this_tid); if (bytes == -1) { @@ -830,9 +830,9 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc bytes = read_memory( handle, - address_of_thread + local_debug_offsets.thread_state.next, + thread_state_addr + debug_offsets.thread_state.next, sizeof(void*), - &address_of_thread); + &thread_state_addr); if (bytes == -1) { return -1; } @@ -840,15 +840,15 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc } else { bytes = read_memory( handle, - address_of_interpreter_state + local_debug_offsets.interpreter_state.threads_main, + interpreter_state_addr + debug_offsets.interpreter_state.threads_main, sizeof(void*), - &address_of_thread); + &thread_state_addr); if (bytes == -1) { return -1; } } - if (address_of_thread == 0) { + if (thread_state_addr == 0) { PyErr_SetString(PyExc_RuntimeError, "No thread state found"); return -1; } @@ -856,7 +856,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc uintptr_t eval_breaker; bytes = read_memory( handle, - address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + thread_state_addr + debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker); if (bytes == -1) { @@ -866,16 +866,16 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc eval_breaker |= _PY_EVAL_PLEASE_STOP_BIT; // Ensure our path is not too long - if (local_debug_offsets.debugger_support.debugger_script_path_size <= strlen(debugger_script_path)) { + if (debug_offsets.debugger_support.debugger_script_path_size <= strlen(debugger_script_path)) { PyErr_SetString(PyExc_ValueError, "Debugger script path is too long"); return -1; } if (debugger_script_path != NULL) { uintptr_t debugger_script_path_addr = ( - address_of_thread + - local_debug_offsets.debugger_support.remote_debugger_support + - local_debug_offsets.debugger_support.debugger_script_path); + thread_state_addr + + debug_offsets.debugger_support.remote_debugger_support + + debug_offsets.debugger_support.debugger_script_path); bytes = write_memory( handle, debugger_script_path_addr, @@ -888,9 +888,9 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc int pending_call = 1; uintptr_t debugger_pending_call_addr = ( - address_of_thread + - local_debug_offsets.debugger_support.remote_debugger_support + - local_debug_offsets.debugger_support.debugger_pending_call); + thread_state_addr + + debug_offsets.debugger_support.remote_debugger_support + + debug_offsets.debugger_support.debugger_pending_call); bytes = write_memory( handle, debugger_pending_call_addr, @@ -903,7 +903,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc bytes = write_memory( handle, - address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + thread_state_addr + debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker); @@ -913,7 +913,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc bytes = read_memory( handle, - address_of_thread + local_debug_offsets.debugger_support.eval_breaker, + thread_state_addr + debug_offsets.debugger_support.eval_breaker, sizeof(uintptr_t), &eval_breaker); From fbecfdb62f548ebfbf8be0b1a6f169d2e6bfd937 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 26 Mar 2025 20:21:47 -0400 Subject: [PATCH 26/36] Have read_memory/write_memory return 0 on success Previously we were returning the number of bytes, which we didn't need and would never be different than the argument that was passed into the function. --- Python/remote_debugging.c | 155 +++++++++++++++++++------------------- 1 file changed, 77 insertions(+), 78 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 73f61eb2cc6d52..34980fb6ec6faa 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -539,7 +539,7 @@ get_py_runtime(proc_handle_t* handle) } // Platform-independent memory read function -static Py_ssize_t +static int read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* dst) { #ifdef MS_WINDOWS @@ -552,7 +552,7 @@ read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* ds } result += read_bytes; } while (result < len); - return (Py_ssize_t)result; + return 0; #elif defined(__linux__) && HAVE_PROCESS_VM_READV struct iovec local[1]; struct iovec remote[1]; @@ -573,7 +573,7 @@ read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* ds result += read_bytes; } while ((size_t)read_bytes != local[0].iov_len); - return result; + return 0; #elif defined(__APPLE__) && TARGET_OS_OSX Py_ssize_t result = -1; kern_return_t kr = mach_vm_read_overwrite( @@ -596,7 +596,7 @@ read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* ds } return -1; } - return len; + return 0; #else PyErr_SetString( PyExc_RuntimeError, @@ -606,7 +606,7 @@ read_memory(proc_handle_t *handle, uint64_t remote_address, size_t len, void* ds } // Platform-independent memory write function -static Py_ssize_t +static int write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const void* src) { #ifdef MS_WINDOWS @@ -619,7 +619,7 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const } result += written; } while (result < len); - return (Py_ssize_t)result; + return 0; #elif defined(__linux__) && HAVE_PROCESS_VM_READV struct iovec local[1]; struct iovec remote[1]; @@ -640,7 +640,7 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const result += written; } while ((size_t)written != local[0].iov_len); - return result; + return 0; #elif defined(__APPLE__) && TARGET_OS_OSX kern_return_t kr = mach_vm_write( pid_to_task(handle->pid), @@ -661,7 +661,7 @@ write_memory(proc_handle_t *handle, uintptr_t remote_address, size_t len, const } return -1; } - return len; + return 0; #else PyErr_Format(PyExc_RuntimeError, "Writing memory is not supported on this platform"); return -1; @@ -749,9 +749,7 @@ read_offsets( return -1; } size_t size = sizeof(struct _Py_DebugOffsets); - Py_ssize_t bytes = read_memory( - handle, *runtime_start_address, size, debug_offsets); - if (bytes < 0 || (size_t)bytes != size) { + if (0 != read_memory(handle, *runtime_start_address, size, debug_offsets)) { return -1; } if (ensure_debug_offset_compatibility(debug_offsets)) { @@ -773,12 +771,12 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc uintptr_t interpreter_state_list_head = debug_offsets.runtime_state.interpreters_head; uintptr_t interpreter_state_addr; - Py_ssize_t bytes = read_memory( - handle, - runtime_start_address + interpreter_state_list_head, - sizeof(void*), - &interpreter_state_addr); - if (bytes == -1) { + if (0 != read_memory( + handle, + runtime_start_address + interpreter_state_list_head, + sizeof(void*), + &interpreter_state_addr)) + { return -1; } @@ -788,12 +786,12 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc } int is_remote_debugging_enabled = 0; - bytes = read_memory( - handle, - interpreter_state_addr + debug_offsets.debugger_support.remote_debugging_enabled, - sizeof(int), - &is_remote_debugging_enabled); - if (bytes == -1) { + if (0 != read_memory( + handle, + interpreter_state_addr + debug_offsets.debugger_support.remote_debugging_enabled, + sizeof(int), + &is_remote_debugging_enabled)) + { return -1; } @@ -806,21 +804,21 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc pid_t this_tid = 0; if (tid != 0) { - bytes = read_memory( - handle, - interpreter_state_addr + debug_offsets.interpreter_state.threads_head, - sizeof(void*), - &thread_state_addr); - if (bytes == -1) { + if (0 != read_memory( + handle, + interpreter_state_addr + debug_offsets.interpreter_state.threads_head, + sizeof(void*), + &thread_state_addr)) + { return -1; } while (thread_state_addr != 0) { - bytes = read_memory( - handle, - thread_state_addr + debug_offsets.thread_state.native_thread_id, - sizeof(pid_t), - &this_tid); - if (bytes == -1) { + if (0 != read_memory( + handle, + thread_state_addr + debug_offsets.thread_state.native_thread_id, + sizeof(pid_t), + &this_tid)) + { return -1; } @@ -828,22 +826,22 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc break; } - bytes = read_memory( - handle, - thread_state_addr + debug_offsets.thread_state.next, - sizeof(void*), - &thread_state_addr); - if (bytes == -1) { + if (0 != read_memory( + handle, + thread_state_addr + debug_offsets.thread_state.next, + sizeof(void*), + &thread_state_addr)) + { return -1; } } } else { - bytes = read_memory( - handle, - interpreter_state_addr + debug_offsets.interpreter_state.threads_main, - sizeof(void*), - &thread_state_addr); - if (bytes == -1) { + if (0 != read_memory( + handle, + interpreter_state_addr + debug_offsets.interpreter_state.threads_main, + sizeof(void*), + &thread_state_addr)) + { return -1; } } @@ -854,12 +852,12 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc } uintptr_t eval_breaker; - bytes = read_memory( - handle, - thread_state_addr + debug_offsets.debugger_support.eval_breaker, - sizeof(uintptr_t), - &eval_breaker); - if (bytes == -1) { + if (0 != read_memory( + handle, + thread_state_addr + debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker)) + { return -1; } @@ -876,12 +874,12 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc thread_state_addr + debug_offsets.debugger_support.remote_debugger_support + debug_offsets.debugger_support.debugger_script_path); - bytes = write_memory( - handle, - debugger_script_path_addr, - strlen(debugger_script_path) + 1, - debugger_script_path); - if (bytes == -1) { + if (0 != write_memory( + handle, + debugger_script_path_addr, + strlen(debugger_script_path) + 1, + debugger_script_path)) + { return -1; } } @@ -891,33 +889,34 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc thread_state_addr + debug_offsets.debugger_support.remote_debugger_support + debug_offsets.debugger_support.debugger_pending_call); - bytes = write_memory( - handle, - debugger_pending_call_addr, - sizeof(int), - &pending_call); + if (0 != write_memory( + handle, + debugger_pending_call_addr, + sizeof(int), + &pending_call)) - if (bytes == -1) { + { return -1; } - bytes = write_memory( - handle, - thread_state_addr + debug_offsets.debugger_support.eval_breaker, - sizeof(uintptr_t), - &eval_breaker); + if (0 != write_memory( + handle, + thread_state_addr + debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker)) - if (bytes == -1) { + { return -1; } - bytes = read_memory( - handle, - thread_state_addr + debug_offsets.debugger_support.eval_breaker, - sizeof(uintptr_t), - &eval_breaker); - - printf("Eval breaker: %p\n", (void*)eval_breaker); + if (0 == read_memory( + handle, + thread_state_addr + debug_offsets.debugger_support.eval_breaker, + sizeof(uintptr_t), + &eval_breaker)) + { + printf("Eval breaker: %p\n", (void*)eval_breaker); + } return 0; } From fa98f64a702a8482c52b6196c1a86d912758769f Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Wed, 26 Mar 2025 20:33:59 -0400 Subject: [PATCH 27/36] Improve an error message --- Python/remote_debugging.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 34980fb6ec6faa..f8251c5f33cac1 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -781,7 +781,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc } if (interpreter_state_addr == 0) { - PyErr_SetString(PyExc_RuntimeError, "No interpreter state found"); + PyErr_SetString(PyExc_RuntimeError, "Can't find a running interpreter in the remote process"); return -1; } From 5b4cb00877b05b82b4cedf4615b3af3e5b71464a Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 21:23:44 -0400 Subject: [PATCH 28/36] Remove a debugging printf --- Python/remote_debugging.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index f8251c5f33cac1..d50863d3961eca 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -909,15 +909,6 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc return -1; } - if (0 == read_memory( - handle, - thread_state_addr + debug_offsets.debugger_support.eval_breaker, - sizeof(uintptr_t), - &eval_breaker)) - { - printf("Eval breaker: %p\n", (void*)eval_breaker); - } - return 0; } From 0dd7797ed94075e2908348c5873f322f315010d8 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 21:34:37 -0400 Subject: [PATCH 29/36] Fix a remote read to use the correct type This field in the structure is defined as an `unsigned long`, not as a `pid_t`. --- Python/remote_debugging.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index d50863d3961eca..3683c6e3295377 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -801,7 +801,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc } uintptr_t thread_state_addr; - pid_t this_tid = 0; + unsigned long this_tid = 0; if (tid != 0) { if (0 != read_memory( @@ -822,7 +822,7 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc return -1; } - if (this_tid == tid) { + if (this_tid == (unsigned long)tid) { break; } From b8a05031f59f95f6cc41e12e8c99e03974eeab22 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 21:35:22 -0400 Subject: [PATCH 30/36] Improve the error message when we can't find the requested thread --- Python/remote_debugging.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index 3683c6e3295377..bd0b60b6111e3c 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -835,6 +835,13 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc return -1; } } + + if (thread_state_addr == 0) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't find the specified thread in the remote process"); + return -1; + } } else { if (0 != read_memory( handle, @@ -844,11 +851,13 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc { return -1; } - } - if (thread_state_addr == 0) { - PyErr_SetString(PyExc_RuntimeError, "No thread state found"); - return -1; + if (thread_state_addr == 0) { + PyErr_SetString( + PyExc_RuntimeError, + "Can't find the main thread in the remote process"); + return -1; + } } uintptr_t eval_breaker; From 9344d1dd815b4c00a14633cd02bc500f90b0ce59 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 21:35:41 -0400 Subject: [PATCH 31/36] Remove a useless check We already did a `strlen` on the `debugger_script_path`, so it's too late to check if it's NULL here! --- Python/remote_debugging.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index bd0b60b6111e3c..c7f3cc5f174c97 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -878,19 +878,17 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc return -1; } - if (debugger_script_path != NULL) { - uintptr_t debugger_script_path_addr = ( - thread_state_addr + - debug_offsets.debugger_support.remote_debugger_support + - debug_offsets.debugger_support.debugger_script_path); - if (0 != write_memory( - handle, - debugger_script_path_addr, - strlen(debugger_script_path) + 1, - debugger_script_path)) - { - return -1; - } + uintptr_t debugger_script_path_addr = ( + thread_state_addr + + debug_offsets.debugger_support.remote_debugger_support + + debug_offsets.debugger_support.debugger_script_path); + if (0 != write_memory( + handle, + debugger_script_path_addr, + strlen(debugger_script_path) + 1, + debugger_script_path)) + { + return -1; } int pending_call = 1; From 9368d38a1ef7b82b6a1531b7b16523e6bf1be627 Mon Sep 17 00:00:00 2001 From: Matt Wozniski Date: Fri, 28 Mar 2025 21:41:59 -0400 Subject: [PATCH 32/36] Only accept a flag of 1 to mean remote debugging is enabled This helps to harden a bit against heap corruption. --- Python/remote_debugging.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/remote_debugging.c b/Python/remote_debugging.c index c7f3cc5f174c97..13e23f83fb2eb5 100644 --- a/Python/remote_debugging.c +++ b/Python/remote_debugging.c @@ -795,8 +795,10 @@ send_exec_to_proc_handle(proc_handle_t *handle, int tid, const char *debugger_sc return -1; } - if (is_remote_debugging_enabled == 0) { - PyErr_SetString(PyExc_RuntimeError, "Remote debugging is not enabled in the remote process"); + if (is_remote_debugging_enabled != 1) { + PyErr_SetString( + PyExc_RuntimeError, + "Remote debugging is not enabled in the remote process"); return -1; } From d253966d671c40be3da0f1890127d5b7a1792412 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 31 Mar 2025 13:15:48 +0100 Subject: [PATCH 33/36] Lint --- Lib/test/test_sys.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 65f9ae13283377..364ca3032fbf26 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -2032,7 +2032,7 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu try: # Accept connection from target process client_socket, _ = server_socket.accept() - + # Wait for process to be ready response = client_socket.recv(1024) self.assertEqual(response, b"ready") From 8e04fdd450898b178f5ad71b1c51f6eb620e02d1 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 31 Mar 2025 13:22:59 +0100 Subject: [PATCH 34/36] simplify socket handling --- Lib/test/test_sys.py | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) diff --git a/Lib/test/test_sys.py b/Lib/test/test_sys.py index 364ca3032fbf26..c7bba978127651 100644 --- a/Lib/test/test_sys.py +++ b/Lib/test/test_sys.py @@ -18,6 +18,7 @@ from test import support from test.support import os_helper from test.support.script_helper import assert_python_ok, assert_python_failure +from test.support.socket_helper import find_unused_port from test.support import threading_helper from test.support import import_helper from test.support import force_not_colorized @@ -1980,20 +1981,17 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu target = os_helper.TESTFN + '_target.py' self.addCleanup(os_helper.unlink, target) - # Find an available port for the socket - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - s.bind(('localhost', 0)) - port = s.getsockname()[1] + parent_sock, child_sock = socket.socketpair() with open(target, 'w') as f: f.write(f''' import sys import time import socket +import os -# Connect to the test process -sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) -sock.connect(('localhost', {port})) +# Get the socket from the passed file descriptor +sock = socket.socket(fileno={child_sock.fileno()}) # Signal that the process is ready sock.sendall(b"ready") @@ -2019,32 +2017,28 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu cmd.extend(python_args) cmd.append(target) - # Create a socket server to communicate with the target process - server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - server_socket.bind(('localhost', port)) - server_socket.settimeout(10.0) # Set a timeout to prevent hanging - server_socket.listen(1) - with subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - env=env) as proc: + env=env, + pass_fds=[child_sock.fileno()], + ) as proc: try: - # Accept connection from target process - client_socket, _ = server_socket.accept() + # Close the child socket in the parent process as it's now owned by the child + child_sock.close() - # Wait for process to be ready - response = client_socket.recv(1024) + # Wait for process to be ready + response = parent_sock.recv(1024) self.assertEqual(response, b"ready") # Try remote exec on the target process sys.remote_exec(proc.pid, script) # Signal script to continue - client_socket.sendall(b"continue") + parent_sock.sendall(b"continue") # Wait for execution confirmation - response = client_socket.recv(1024) + response = parent_sock.recv(1024) self.assertEqual(response, b"executed") # Return output for test verification @@ -2053,9 +2047,8 @@ def _run_remote_exec_test(self, script_code, python_args=None, env=None, prologu except PermissionError: self.skipTest("Insufficient permissions to execute code in remote process") finally: - if 'client_socket' in locals(): - client_socket.close() - server_socket.close() + # Wait for execution confirmation + parent_sock.close() proc.kill() proc.terminate() proc.wait(timeout=SHORT_TIMEOUT) From 0c2b275ee794aee29942ae540f4ed1ee07c87e3b Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 31 Mar 2025 13:25:18 +0100 Subject: [PATCH 35/36] Add NEWS entry --- .../2025-03-31-13-25-14.gh-issue-131591.DsPKZt.rst | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-13-25-14.gh-issue-131591.DsPKZt.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-13-25-14.gh-issue-131591.DsPKZt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-13-25-14.gh-issue-131591.DsPKZt.rst new file mode 100644 index 00000000000000..5c75c9175f0038 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-03-31-13-25-14.gh-issue-131591.DsPKZt.rst @@ -0,0 +1,4 @@ +Implement :pep:`768` (Safe external debugger interface for CPython). Add a +new :func:`sys.remote_exec` function to the :mod:`sys` module. This function +shcedules the execution of a Python file in a separate process. Patch by +Pablo Galindo, Matt Matt Wozniski and Ivona Stojanovic. From 80856d3fe6fe95340e0f367f66bf155104e8a141 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Mon, 31 Mar 2025 13:45:16 +0100 Subject: [PATCH 36/36] Add more docs --- Doc/library/sys.rst | 22 +++++++++++++ Doc/using/cmdline.rst | 14 ++++++++ Doc/using/configure.rst | 9 +++++ Doc/whatsnew/3.14.rst | 73 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+) diff --git a/Doc/library/sys.rst b/Doc/library/sys.rst index bb88cf73ec1e89..81ff9bc809f10c 100644 --- a/Doc/library/sys.rst +++ b/Doc/library/sys.rst @@ -1835,6 +1835,28 @@ always available. Unless explicitly noted otherwise, all variables are read-only .. versionadded:: 3.12 + +.. function:: remote_exec(pid, script) + + Executes a file containing Python code given by *script* in the remote + process with the given *pid*. + + This function returns immediately, and the code will be executed by the + target process's main thread at the next available opportunity, similarly + to how signals are handled. There is no interface to determine when the + code has been executed. The caller is responsible for making sure that + the file still exists whenever the remote process tries to read it and that + it hasn't been overwritten. + + The remote process must be running a CPython interpreter of the same major + and minor version as the local process. If either the local or remote + interpreter is pre-release (alpha, beta, or release candidate) then the + local and remote interpreters must be the same exact version. + + .. availability:: Unix, Windows. + .. versionadded:: 3.14 + + .. function:: _enablelegacywindowsfsencoding() Changes the :term:`filesystem encoding and error handler` to 'mbcs' and diff --git a/Doc/using/cmdline.rst b/Doc/using/cmdline.rst index 2a59cf3f62d4c5..38782da8109337 100644 --- a/Doc/using/cmdline.rst +++ b/Doc/using/cmdline.rst @@ -603,6 +603,13 @@ Miscellaneous options .. versionadded:: 3.13 + * ``-X disable_remote_debug`` disables the remote debugging support as described + in :pep:`768`. This option is only available on some platforms and will do nothing + if is not supported on the current system. See also + :envvar:`PYTHON_DISABLE_REMOTE_DEBUG` and :pep:`768`. + + .. versionadded:: 3.14 + * :samp:`-X cpu_count={n}` overrides :func:`os.cpu_count`, :func:`os.process_cpu_count`, and :func:`multiprocessing.cpu_count`. *n* must be greater than or equal to 1. @@ -1160,7 +1167,14 @@ conflict. .. versionadded:: 3.13 +.. envvar:: PYTHON_DISABLE_REMOTE_DEBUG + + If this variable is set to a non-empty string, it disables the remote + debugging feature described in :pep:`768`. + + See also the :option:`-X disable_remote_debug` command-line option. + .. versionadded:: 3.14 .. envvar:: PYTHON_CPU_COUNT diff --git a/Doc/using/configure.rst b/Doc/using/configure.rst index 4d47cf945219dd..e2e90394eb8e6c 100644 --- a/Doc/using/configure.rst +++ b/Doc/using/configure.rst @@ -660,6 +660,15 @@ also be used to improve performance. Add ``-fstrict-overflow`` to the C compiler flags (by default we add ``-fno-strict-overflow`` instead). +.. option:: --without-remote-debug + + Deactivate remote debugging support described in :pep:`768` (enabled by default). + When this flag is provided the code that allows the interpreter to schedule the + execution of a Python file in a separate process as described in :pep:`768` is + not compiled. + + ..versionadded:: 3.14 + .. _debug-build: diff --git a/Doc/whatsnew/3.14.rst b/Doc/whatsnew/3.14.rst index ac5b53ef94bfb1..48c0680d20a6cd 100644 --- a/Doc/whatsnew/3.14.rst +++ b/Doc/whatsnew/3.14.rst @@ -90,6 +90,79 @@ If you encounter :exc:`NameError`\s or pickling errors coming out of New features ============ +.. _whatsnew314-pep678: + +PEP 768: Safe external debugger interface for CPython +---------------------------------------------------- + +:pep:`768` introduces a zero-overhead debugging interface that allows debuggers and profilers +to safely attach to running Python processes. This is a significant enhancement to Python's +debugging capabilities, bringing them in line with other major programming languages. + +The new interface provides safe execution points for attaching debugger code without modifying +the interpreter's normal execution path or adding runtime overhead. This enables tools to +inspect and interact with Python applications in real-time without stopping or restarting +them — a crucial capability for high-availability systems and production environments. + +For convenience, CPython implements this interface through the :mod:`sys` module with a +:func:`sys.remote_exec` function:: + + sys.remote_exec(pid, script_path) + +This function allows sending Python code to be executed in a target process at the next safe +execution point. However, tool authors can also implement the protocol directly as described +in the PEP, which details the underlying mechanisms used to safely attach to running processes. + +Here's a simple example that inspects object types in a running Python process: + + .. code-block:: python + + import sys + import tempfile + import os + + # Create a temporary script + with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f: + script_path = f.name + f.write(""" + import gc + import collections + + # Collect all objects managed by the garbage collector + gc.collect() + + # Count objects by type + type_counts = collections.Counter(type(obj).__name__ + for obj in gc.get_objects()) + + # Print the most common types + print("Most common object types in process:") + for type_name, count in type_counts.most_common(10): + print(f" {type_name}: {count}") + """) + + try: + # Execute in process with PID 1234 + print("Behold! An offering:") + sys.remote_exec(1234, script_path) + finally: + os.unlink(script_path) + +The debugging interface has been carefully designed with security in mind and includes several +mechanisms to control access: + +* A :envvar:`PYTHON_DISABLE_REMOTE_DEBUG` environment variable. +* A :option:`-X disable-remote-debug` command-line option. +* A ``--without-remote-debug`` configure flag to completely disable the feature at build time. + +A key implementation detail is that the interface piggybacks on the interpreter's existing evaluation +loop and safe points, ensuring zero overhead during normal execution while providing a reliable way +for external processes to coordinate debugging operations. + +See :pep:`768` for more details. + +(Contributed by Pablo Galindo Salgado, Matt Wozniski, and Ivona Stojanovic in :gh:`131591`.) + .. _whatsnew314-pep649: PEP 649: deferred evaluation of annotations