From 6f8308b0f9d47956549fecefab04ad7a230c0b2f Mon Sep 17 00:00:00 2001 From: b-pass Date: Mon, 12 Jan 2026 20:27:59 -0500 Subject: [PATCH 01/10] Add a shutdown method to internals. shutdown can safely DECREF Python objects owned by the internals. --- include/pybind11/detail/internals.h | 47 ++++++++++++++++++--------- include/pybind11/gil_safe_call_once.h | 4 ++- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index a92f196b1f..40eb86185a 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -308,7 +308,16 @@ struct internals { internals(internals &&other) = delete; internals &operator=(const internals &other) = delete; internals &operator=(internals &&other) = delete; - ~internals() = default; + ~internals() = default; // NOTE: destruct/decref python objects in shutdown() + + /// shutdown is run during interpreter finalization and can (carefully) interact with Python. + void shutdown() { + Py_XDECREF(static_property_type); + static_property_type = nullptr; + + Py_XDECREF(default_metaclass); + default_metaclass = nullptr; + } }; // the internals struct (above) is shared between all the modules. local_internals are only @@ -325,6 +334,12 @@ struct local_internals { std::forward_list registered_exception_translators; PyTypeObject *function_record_py_type = nullptr; + + /// shutdown is run during interpreter finalization and can (carefully) interact with Python. + void shutdown() { + Py_XDECREF(function_record_py_type); + function_record_py_type = nullptr; + } }; enum class holder_enum_t : uint8_t { @@ -569,7 +584,7 @@ inline object get_python_state_dict() { // The bool follows std::map::insert convention: true = created, false = existed. template std::pair atomic_get_or_create_in_state_dict(const char *key, - bool clear_destructor = false) { + void (*dtor)(void *) = nullptr) { error_scope err_scope; // preserve any existing Python error states auto state_dict = reinterpret_borrow(get_python_state_dict()); @@ -595,7 +610,7 @@ std::pair atomic_get_or_create_in_state_dict(const char *key, // - If our capsule is NOT inserted (another thread inserted first), it will be // destructed when going out of scope here, so the destructor will be called // immediately, which will also free the storage. - /*destructor=*/[](void *ptr) -> void { delete static_cast(ptr); }); + /*destructor=*/dtor); // At this point, the capsule object is created successfully. // Release the unique_ptr and let the capsule object own the storage to avoid double-free. (void) storage_ptr.release(); @@ -613,13 +628,6 @@ std::pair atomic_get_or_create_in_state_dict(const char *key, throw error_already_set(); } created = (capsule_obj == new_capsule.ptr()); - if (clear_destructor && created) { - // Our capsule was inserted. - // Remove the destructor to leak the storage on interpreter shutdown. - if (PyCapsule_SetDestructor(capsule_obj, nullptr) < 0) { - throw error_already_set(); - } - } // - If key already existed, our `new_capsule` is not inserted, it will be destructed when // going out of scope here, which will also free the storage. // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state @@ -707,14 +715,21 @@ class internals_pp_manager { internals_pp_manager(char const *id, on_fetch_function *on_fetch) : holder_id_(id), on_fetch_(on_fetch) {} - std::unique_ptr *get_or_create_pp_in_state_dict() { - // The `unique_ptr` output is leaked on interpreter shutdown. Once an - // instance is created, it will never be deleted until the process exits (compare to - // interpreter shutdown in multiple-interpreter scenarios). + static void internals_shutdown(void *vpp) { + auto *pp = static_cast *>(vpp); + if (pp && *pp) { + (*pp)->shutdown(); + } // Because we cannot guarantee the order of destruction of capsules in the interpreter - // state dict, leaking avoids potential use-after-free issues during interpreter shutdown. + // state dict, the internals unique_ptr is not deleted in this capsule destructor. + // The internals (and their unique_ptr owner) cannot be deleted until after the interpreter + // has completely shut down. Final cleanup will be done pybind11::finalize_interpreter if + // pybind11 was embedded, or it will be leaked if this is an extension module. + } + + std::unique_ptr *get_or_create_pp_in_state_dict() { auto result = atomic_get_or_create_in_state_dict>( - holder_id_, /*clear_destructor=*/true); + holder_id_, &internals_shutdown); auto *pp = result.first; bool created = result.second; // Only call on_fetch_ when fetching existing internals, not when creating new ones. diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h index 770ed49998..2a5113225d 100644 --- a/include/pybind11/gil_safe_call_once.h +++ b/include/pybind11/gil_safe_call_once.h @@ -250,7 +250,9 @@ class gil_safe_call_once_and_store { // Get or create per-storage capsule in the current interpreter's state dict. // The storage is interpreter-dependent and will not be shared across interpreters. storage_type *get_or_create_storage_in_state_dict() { - return detail::atomic_get_or_create_in_state_dict(get_storage_key().c_str()) + return detail::atomic_get_or_create_in_state_dict( + get_storage_key().c_str(), + [](void *ptr) -> void { delete static_cast(ptr); }) .first; } From 50c319d6c79081b47655a583267b64ec19876c9c Mon Sep 17 00:00:00 2001 From: b-pass Date: Tue, 13 Jan 2026 20:38:46 -0500 Subject: [PATCH 02/10] Actually free internals during interpreter shutdown (instead of after) --- include/pybind11/detail/internals.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 40eb86185a..66a23654ce 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -308,10 +308,7 @@ struct internals { internals(internals &&other) = delete; internals &operator=(const internals &other) = delete; internals &operator=(internals &&other) = delete; - ~internals() = default; // NOTE: destruct/decref python objects in shutdown() - - /// shutdown is run during interpreter finalization and can (carefully) interact with Python. - void shutdown() { + ~internals() { Py_XDECREF(static_property_type); static_property_type = nullptr; @@ -335,8 +332,7 @@ struct local_internals { std::forward_list registered_exception_translators; PyTypeObject *function_record_py_type = nullptr; - /// shutdown is run during interpreter finalization and can (carefully) interact with Python. - void shutdown() { + ~local_internals() { Py_XDECREF(function_record_py_type); function_record_py_type = nullptr; } @@ -717,14 +713,12 @@ class internals_pp_manager { static void internals_shutdown(void *vpp) { auto *pp = static_cast *>(vpp); - if (pp && *pp) { - (*pp)->shutdown(); + if (pp) { + pp->reset(); } - // Because we cannot guarantee the order of destruction of capsules in the interpreter - // state dict, the internals unique_ptr is not deleted in this capsule destructor. - // The internals (and their unique_ptr owner) cannot be deleted until after the interpreter - // has completely shut down. Final cleanup will be done pybind11::finalize_interpreter if - // pybind11 was embedded, or it will be leaked if this is an extension module. + // Because the unique_ptr is still pointed to by the pp_manager in this and possibly other + // modules, we cannot free the unique_ptr itself until after the interpreter has shut down. + // If this interpreter was not created/owned by pybind11 then this unique_ptr is leaked. } std::unique_ptr *get_or_create_pp_in_state_dict() { From cb5a5a58040cd179500a89694080f234d85853a2 Mon Sep 17 00:00:00 2001 From: b-pass Date: Tue, 13 Jan 2026 21:01:21 -0500 Subject: [PATCH 03/10] Make sure python is alive before DECREFing If something triggers internals to be created during finalization, it might end up being destroyed after finalization and we don't want to do the DECREF at that point, we need the leaky behavior. --- include/pybind11/detail/internals.h | 31 ++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 66a23654ce..9d9e139ede 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -309,11 +309,17 @@ struct internals { internals &operator=(const internals &other) = delete; internals &operator=(internals &&other) = delete; ~internals() { - Py_XDECREF(static_property_type); - static_property_type = nullptr; - - Py_XDECREF(default_metaclass); - default_metaclass = nullptr; + // Normally this destructor runs during interpreter finalization and it may DECREF things. + // In odd finalization scenarios it might end up running after the interpreter has + // completely shut down, In that case, we should not decref these objects because pymalloc + // is gone. + if (Py_IsInitialized()) { + Py_XDECREF(static_property_type); + static_property_type = nullptr; + + Py_XDECREF(default_metaclass); + default_metaclass = nullptr; + } } }; @@ -333,8 +339,14 @@ struct local_internals { PyTypeObject *function_record_py_type = nullptr; ~local_internals() { - Py_XDECREF(function_record_py_type); - function_record_py_type = nullptr; + // Normally this destructor runs during interpreter finalization and it may DECREF things. + // In odd finalization scenarios it might end up running after the interpreter has + // completely shut down, In that case, we should not decref these objects because pymalloc + // is gone. + if (Py_IsInitialized()) { + Py_XDECREF(function_record_py_type); + function_record_py_type = nullptr; + } } }; @@ -717,8 +729,9 @@ class internals_pp_manager { pp->reset(); } // Because the unique_ptr is still pointed to by the pp_manager in this and possibly other - // modules, we cannot free the unique_ptr itself until after the interpreter has shut down. - // If this interpreter was not created/owned by pybind11 then this unique_ptr is leaked. + // modules, we cannot delete the unique_ptr itself until after the interpreter has shut + // down. If this interpreter was not created/owned by pybind11 then the unique_ptr itself + // (but not its contents) is leaked. } std::unique_ptr *get_or_create_pp_in_state_dict() { From f5676ea22796d7245ac440ffafac08a43fbd6446 Mon Sep 17 00:00:00 2001 From: b-pass Date: Tue, 13 Jan 2026 21:16:32 -0500 Subject: [PATCH 04/10] make clang-tidy happy --- include/pybind11/detail/internals.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 9d9e139ede..cc96ed732d 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -313,7 +313,7 @@ struct internals { // In odd finalization scenarios it might end up running after the interpreter has // completely shut down, In that case, we should not decref these objects because pymalloc // is gone. - if (Py_IsInitialized()) { + if (Py_IsInitialized() != 0) { Py_XDECREF(static_property_type); static_property_type = nullptr; @@ -343,7 +343,7 @@ struct local_internals { // In odd finalization scenarios it might end up running after the interpreter has // completely shut down, In that case, we should not decref these objects because pymalloc // is gone. - if (Py_IsInitialized()) { + if (Py_IsInitialized() != 0) { Py_XDECREF(function_record_py_type); function_record_py_type = nullptr; } From d12be34206d394f18b1f50a263750a49a8e66fd1 Mon Sep 17 00:00:00 2001 From: b-pass Date: Wed, 14 Jan 2026 18:18:38 -0500 Subject: [PATCH 05/10] Check IsFinalizing and use Py_CLEAR, make capsule creation safe if the capsule already exists. --- include/pybind11/detail/internals.h | 58 +++++++++++++++------------ include/pybind11/gil_safe_call_once.h | 4 +- 2 files changed, 34 insertions(+), 28 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index cc96ed732d..1c29cc74f3 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -103,7 +103,7 @@ class thread_specific_storage { // However, in GraalPy (as of v24.2 or older), TSS is implemented by Java and this call // requires a living Python interpreter. #ifdef GRAALVM_PYTHON - if (!Py_IsInitialized() || _Py_IsFinalizing()) { + if (!is_interpreter_alive()) { return; } #endif @@ -195,6 +195,14 @@ struct override_hash { using instance_map = std::unordered_multimap; +inline bool is_interpreter_alive() { +#if PY_VERSION_HEX < 0x030D0000 + return Py_IsInitialized() != 0 || _Py_IsFinalizing() != 0; +#else + return Py_IsInitialized() != 0 || Py_IsFinalizing() != 0; +#endif +} + #ifdef Py_GIL_DISABLED // Wrapper around PyMutex to provide BasicLockable semantics class pymutex { @@ -313,12 +321,9 @@ struct internals { // In odd finalization scenarios it might end up running after the interpreter has // completely shut down, In that case, we should not decref these objects because pymalloc // is gone. - if (Py_IsInitialized() != 0) { - Py_XDECREF(static_property_type); - static_property_type = nullptr; - - Py_XDECREF(default_metaclass); - default_metaclass = nullptr; + if (is_interpreter_alive()) { + Py_CLEAR(static_property_type); + Py_CLEAR(default_metaclass); } } }; @@ -343,9 +348,8 @@ struct local_internals { // In odd finalization scenarios it might end up running after the interpreter has // completely shut down, In that case, we should not decref these objects because pymalloc // is gone. - if (Py_IsInitialized() != 0) { - Py_XDECREF(function_record_py_type); - function_record_py_type = nullptr; + if (is_interpreter_alive()) { + Py_CLEAR(function_record_py_type); } } }; @@ -592,7 +596,7 @@ inline object get_python_state_dict() { // The bool follows std::map::insert convention: true = created, false = existed. template std::pair atomic_get_or_create_in_state_dict(const char *key, - void (*dtor)(void *) = nullptr) { + void (*dtor)(PyObject *) = nullptr) { error_scope err_scope; // preserve any existing Python error states auto state_dict = reinterpret_borrow(get_python_state_dict()); @@ -609,16 +613,13 @@ std::pair atomic_get_or_create_in_state_dict(const char *key, // Use unique_ptr for exception safety: if capsule creation throws, the storage is // automatically deleted. auto storage_ptr = std::unique_ptr(new Payload{}); - // Create capsule with destructor to clean up when the interpreter shuts down. - auto new_capsule = capsule( - storage_ptr.get(), - // The destructor will be called when the capsule is GC'ed. - // - If our capsule is inserted into the dict below, it will be kept alive until - // interpreter shutdown, so the destructor will be called at that time. - // - If our capsule is NOT inserted (another thread inserted first), it will be - // destructed when going out of scope here, so the destructor will be called - // immediately, which will also free the storage. - /*destructor=*/dtor); + auto new_capsule + = capsule(storage_ptr.get(), + // The destructor will be called when the capsule is GC'ed. + // If the insert below fails (entry already in the dict), then this + // destructor will be called on the newly created capsule at the end of this + // function, and we want to just release this memory. + /*destructor=*/[](void *v) { delete static_cast(v); }); // At this point, the capsule object is created successfully. // Release the unique_ptr and let the capsule object own the storage to avoid double-free. (void) storage_ptr.release(); @@ -637,9 +638,15 @@ std::pair atomic_get_or_create_in_state_dict(const char *key, } created = (capsule_obj == new_capsule.ptr()); // - If key already existed, our `new_capsule` is not inserted, it will be destructed when - // going out of scope here, which will also free the storage. + // going out of scope here, and will call the destructor set above. // - Otherwise, our `new_capsule` is now in the dict, and it owns the storage and the state - // dict will incref it. + // dict will incref it. We need to set the caller's destructor on it, which will be + // called when the interpreter shuts down. + if (created && dtor) { + if (PyCapsule_SetDestructor(capsule_obj, dtor) < 0) { + throw error_already_set(); + } + } } // Get the storage pointer from the capsule. @@ -723,8 +730,9 @@ class internals_pp_manager { internals_pp_manager(char const *id, on_fetch_function *on_fetch) : holder_id_(id), on_fetch_(on_fetch) {} - static void internals_shutdown(void *vpp) { - auto *pp = static_cast *>(vpp); + static void internals_shutdown(PyObject *capsule) { + auto *pp = static_cast *>( + PyCapsule_GetPointer(capsule, nullptr)); if (pp) { pp->reset(); } diff --git a/include/pybind11/gil_safe_call_once.h b/include/pybind11/gil_safe_call_once.h index 2a5113225d..770ed49998 100644 --- a/include/pybind11/gil_safe_call_once.h +++ b/include/pybind11/gil_safe_call_once.h @@ -250,9 +250,7 @@ class gil_safe_call_once_and_store { // Get or create per-storage capsule in the current interpreter's state dict. // The storage is interpreter-dependent and will not be shared across interpreters. storage_type *get_or_create_storage_in_state_dict() { - return detail::atomic_get_or_create_in_state_dict( - get_storage_key().c_str(), - [](void *ptr) -> void { delete static_cast(ptr); }) + return detail::atomic_get_or_create_in_state_dict(get_storage_key().c_str()) .first; } From 959b1e12660ff4082f1362ae3665a71a9027d522 Mon Sep 17 00:00:00 2001 From: b-pass Date: Wed, 14 Jan 2026 19:22:40 -0500 Subject: [PATCH 06/10] oops, put TLS destructor back how it was. --- include/pybind11/detail/internals.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 1c29cc74f3..31721b2ab4 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -103,7 +103,7 @@ class thread_specific_storage { // However, in GraalPy (as of v24.2 or older), TSS is implemented by Java and this call // requires a living Python interpreter. #ifdef GRAALVM_PYTHON - if (!is_interpreter_alive()) { + if (Py_IsInitialized() == 0 || Py_IsFinalizing() != 0) { return; } #endif From 717e99f8643d7f923d146e61b3f24623eb92bb5d Mon Sep 17 00:00:00 2001 From: b-pass Date: Wed, 14 Jan 2026 20:02:19 -0500 Subject: [PATCH 07/10] Oops, proper spelling of unstable _Py_IsFinalizing --- include/pybind11/detail/internals.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 31721b2ab4..4ef0eac6f0 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -103,7 +103,7 @@ class thread_specific_storage { // However, in GraalPy (as of v24.2 or older), TSS is implemented by Java and this call // requires a living Python interpreter. #ifdef GRAALVM_PYTHON - if (Py_IsInitialized() == 0 || Py_IsFinalizing() != 0) { + if (Py_IsInitialized() == 0 || _Py_IsFinalizing() != 0) { return; } #endif From 5f9c30b612dbf40935c50a83adc42c457ddd9648 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Thu, 15 Jan 2026 14:59:54 -0500 Subject: [PATCH 08/10] Add cleanup step to CI workflow Added a step to clean out unused files to save space during CI. --- .github/workflows/ci.yml | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8dcf48928e..45f8675889 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -485,6 +485,11 @@ jobs: steps: - uses: actions/checkout@v6 + - name: Clean out unused stuff to save space + run: | + sudo rm -rf /usr/local/lib/android /usr/share/dotnet /opt/ghc /opt/hostedtoolcache/CodeQL + sudo apt-get clean + - name: Add NVHPC Repo run: | echo 'deb [trusted=yes] https://developer.download.nvidia.com/hpc-sdk/ubuntu/amd64 /' | \ @@ -492,10 +497,11 @@ jobs: - name: Install 🐍 3 & NVHPC run: | - sudo apt-get update -y && \ - sudo apt-get install -y cmake environment-modules git python3-dev python3-pip python3-numpy && \ - sudo apt-get install -y --no-install-recommends nvhpc-25-11 && \ + sudo apt-get update -y + sudo apt-get install -y cmake environment-modules git python3-dev python3-pip python3-numpy + sudo apt-get install -y --no-install-recommends nvhpc-25-11 sudo rm -rf /var/lib/apt/lists/* + apt-cache depends nvhpc-25-11 python3 -m pip install --upgrade pip python3 -m pip install --upgrade pytest From 2848fd61d7d0c6ca83c65a14084fe8fc4a1465bb Mon Sep 17 00:00:00 2001 From: b-pass Date: Sat, 17 Jan 2026 23:01:20 -0500 Subject: [PATCH 09/10] Accept suggested comment --- include/pybind11/detail/internals.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 4ef0eac6f0..dfbe6fe509 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -736,10 +736,16 @@ class internals_pp_manager { if (pp) { pp->reset(); } - // Because the unique_ptr is still pointed to by the pp_manager in this and possibly other - // modules, we cannot delete the unique_ptr itself until after the interpreter has shut - // down. If this interpreter was not created/owned by pybind11 then the unique_ptr itself - // (but not its contents) is leaked. + // We reset the unique_ptr's contents but cannot delete the unique_ptr itself here. + // The pp_manager in this module (and possibly other modules sharing internals) holds + // a raw pointer to this unique_ptr, and that pointer would dangle if we deleted it now. + // + // For pybind11-owned interpreters (via embed.h or subinterpreter.h), destroy() is + // called after Py_Finalize/Py_EndInterpreter completes, which safely deletes the + // unique_ptr. For interpreters not owned by pybind11 (e.g., a pybind11 extension + // loaded into an external interpreter), destroy() is never called and the unique_ptr + // shell (8 bytes, not its contents) is leaked. + // (See PR #5958 for ideas to eliminate this leak.) } std::unique_ptr *get_or_create_pp_in_state_dict() { From 75bd2925a7c877e3c42e8049b52fbafdb1034164 Mon Sep 17 00:00:00 2001 From: b-pass Date: Sun, 18 Jan 2026 00:41:38 -0500 Subject: [PATCH 10/10] Avoid recreating internals during type deallocation at shutdown. --- include/pybind11/detail/class.h | 2 +- include/pybind11/detail/internals.h | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/class.h b/include/pybind11/detail/class.h index 21e966cfea..480c369aa6 100644 --- a/include/pybind11/detail/class.h +++ b/include/pybind11/detail/class.h @@ -207,7 +207,7 @@ extern "C" inline PyObject *pybind11_meta_call(PyObject *type, PyObject *args, P /// Cleanup the type-info for a pybind11-registered type. extern "C" inline void pybind11_meta_dealloc(PyObject *obj) { - with_internals([obj](internals &internals) { + with_internals_if_internals([obj](internals &internals) { auto *type = (PyTypeObject *) obj; // A pybind11-registered type will: diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index dfbe6fe509..d66cf72cc7 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -322,8 +322,9 @@ struct internals { // completely shut down, In that case, we should not decref these objects because pymalloc // is gone. if (is_interpreter_alive()) { - Py_CLEAR(static_property_type); + Py_CLEAR(instance_base); Py_CLEAR(default_metaclass); + Py_CLEAR(static_property_type); } } }; @@ -870,6 +871,17 @@ inline auto with_internals(const F &cb) -> decltype(cb(get_internals())) { return cb(internals); } +template +inline void with_internals_if_internals(const F &cb) { + auto &ppmgr = get_internals_pp_manager(); + auto &internals_ptr = *ppmgr.get_pp(); + if (internals_ptr) { + auto &internals = *internals_ptr; + PYBIND11_LOCK_INTERNALS(internals); + cb(internals); + } +} + template inline auto with_exception_translators(const F &cb) -> decltype(cb(get_internals().registered_exception_translators,