From bfe55ed32fa5232a71aaf837c043db7417e98062 Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Wed, 21 Jan 2026 16:42:49 -0500 Subject: [PATCH 1/8] Fix race condition with py::make_key_iterator in free threading The creation of the iterator class needs to be synchronized. --- include/pybind11/detail/internals.h | 30 +++++++++++++++++++++++++++-- include/pybind11/pybind11.h | 1 + 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 9b3e69f4db..c9ffcb712e 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -238,6 +238,32 @@ class pymutex { void unlock() { PyMutex_Unlock(&mutex); } }; +// A recursive mutex implementation using PyMutex +class pyrecursive_mutex { + PyMutex mutex; + std::atomic owner; + size_t lock_count; + +public: + pyrecursive_mutex() : mutex({}), owner(0), lock_count(0) {} + void lock() { + if (owner.load(std::memory_order_relaxed) == _Py_ThreadId()) { + ++lock_count; + return; + } + PyMutex_Lock(&mutex); + owner.store(_Py_ThreadId(), std::memory_order_relaxed); + } + void unlock() { + if (lock_count > 0) { + --lock_count; + return; + } + owner.store(0, std::memory_order_relaxed); + PyMutex_Unlock(&mutex); + } +}; + // Instance map shards are used to reduce mutex contention in free-threaded Python. struct instance_map_shard { instance_map registered_instances; @@ -271,7 +297,7 @@ class loader_life_support; /// `PYBIND11_INTERNALS_VERSION` must be incremented. struct internals { #ifdef Py_GIL_DISABLED - pymutex mutex; + pyrecursive_mutex mutex; pymutex exception_translator_mutex; #endif #if PYBIND11_INTERNALS_VERSION >= 12 @@ -856,7 +882,7 @@ inline local_internals &get_local_internals() { } #ifdef Py_GIL_DISABLED -# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock lock((internals).mutex) +# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock lock((internals).mutex) #else # define PYBIND11_LOCK_INTERNALS(internals) #endif diff --git a/include/pybind11/pybind11.h b/include/pybind11/pybind11.h index 02d2e72c2c..f88fc20272 100644 --- a/include/pybind11/pybind11.h +++ b/include/pybind11/pybind11.h @@ -3173,6 +3173,7 @@ iterator make_iterator_impl(Iterator first, Sentinel last, Extra &&...extra) { using state = detail::iterator_state; // TODO: state captures only the types of Extra, not the values + PYBIND11_LOCK_INTERNALS(get_internals()); if (!detail::get_type_info(typeid(state), false)) { class_(handle(), "iterator", pybind11::module_local()) .def( From fa039538d390b6850dce5a65c3cdbb238455a707 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 21 Jan 2026 21:47:11 +0000 Subject: [PATCH 2/8] style: pre-commit fixes --- include/pybind11/detail/internals.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index c9ffcb712e..b6c2897ac2 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -882,7 +882,8 @@ inline local_internals &get_local_internals() { } #ifdef Py_GIL_DISABLED -# define PYBIND11_LOCK_INTERNALS(internals) std::unique_lock lock((internals).mutex) +# define PYBIND11_LOCK_INTERNALS(internals) \ + std::unique_lock lock((internals).mutex) #else # define PYBIND11_LOCK_INTERNALS(internals) #endif From 70c2ffcb67f33f1af554d1185f14bf1fb6f9bedb Mon Sep 17 00:00:00 2001 From: Sam Gross Date: Thu, 22 Jan 2026 12:25:57 -0500 Subject: [PATCH 3/8] Use PyCriticalSection_BeginMutex instead of recursive mutex --- include/pybind11/detail/internals.h | 35 +++++++++-------------------- 1 file changed, 11 insertions(+), 24 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index b6c2897ac2..74d96d27a7 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -230,6 +230,7 @@ using instance_map = std::unordered_multimap; #ifdef Py_GIL_DISABLED // Wrapper around PyMutex to provide BasicLockable semantics class pymutex { + friend class pycritical_section; PyMutex mutex; public: @@ -238,29 +239,16 @@ class pymutex { void unlock() { PyMutex_Unlock(&mutex); } }; -// A recursive mutex implementation using PyMutex -class pyrecursive_mutex { - PyMutex mutex; - std::atomic owner; - size_t lock_count; +class pycritical_section { + pymutex& mutex; + PyCriticalSection cs; public: - pyrecursive_mutex() : mutex({}), owner(0), lock_count(0) {} - void lock() { - if (owner.load(std::memory_order_relaxed) == _Py_ThreadId()) { - ++lock_count; - return; - } - PyMutex_Lock(&mutex); - owner.store(_Py_ThreadId(), std::memory_order_relaxed); + explicit pycritical_section(pymutex& m) : mutex(m) { + PyCriticalSection_BeginMutex(&cs, &mutex.mutex); } - void unlock() { - if (lock_count > 0) { - --lock_count; - return; - } - owner.store(0, std::memory_order_relaxed); - PyMutex_Unlock(&mutex); + ~pycritical_section() { + PyCriticalSection_End(&cs); } }; @@ -297,7 +285,7 @@ class loader_life_support; /// `PYBIND11_INTERNALS_VERSION` must be incremented. struct internals { #ifdef Py_GIL_DISABLED - pyrecursive_mutex mutex; + pymutex mutex; pymutex exception_translator_mutex; #endif #if PYBIND11_INTERNALS_VERSION >= 12 @@ -882,8 +870,7 @@ inline local_internals &get_local_internals() { } #ifdef Py_GIL_DISABLED -# define PYBIND11_LOCK_INTERNALS(internals) \ - std::unique_lock lock((internals).mutex) +# define PYBIND11_LOCK_INTERNALS(internals) pycritical_section lock((internals).mutex) #else # define PYBIND11_LOCK_INTERNALS(internals) #endif @@ -912,7 +899,7 @@ inline auto with_exception_translators(const F &cb) get_local_internals().registered_exception_translators)) { auto &internals = get_internals(); #ifdef Py_GIL_DISABLED - std::unique_lock lock((internals).exception_translator_mutex); + pycritical_section lock((internals).exception_translator_mutex); #endif auto &local_internals = get_local_internals(); return cb(internals.registered_exception_translators, From 8690dd71be5d50ae611639a93efe090d60c963b0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 22 Jan 2026 17:43:18 +0000 Subject: [PATCH 4/8] style: pre-commit fixes --- include/pybind11/detail/internals.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index 74d96d27a7..c489dac6d3 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -240,16 +240,14 @@ class pymutex { }; class pycritical_section { - pymutex& mutex; + pymutex &mutex; PyCriticalSection cs; public: - explicit pycritical_section(pymutex& m) : mutex(m) { + explicit pycritical_section(pymutex &m) : mutex(m) { PyCriticalSection_BeginMutex(&cs, &mutex.mutex); } - ~pycritical_section() { - PyCriticalSection_End(&cs); - } + ~pycritical_section() { PyCriticalSection_End(&cs); } }; // Instance map shards are used to reduce mutex contention in free-threaded Python. From be01d1ebe8ed966ef68eb05a11910f508478cf4c Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 24 Jan 2026 10:23:24 -0800 Subject: [PATCH 5/8] Make pycritical_section non-copyable and non-movable The pycritical_section class is a RAII wrapper that manages a Python critical section lifecycle: - Acquires the critical section in the constructor via PyCriticalSection_BeginMutex - Releases it in the destructor via PyCriticalSection_End - Holds a reference to a pymutex Allowing copy or move operations would be dangerous: 1. Copy: Both the original and copied objects would call PyCriticalSection_End on the same PyCriticalSection object in their destructors, leading to double-unlock and undefined behavior. 2. Move: The moved-from object's destructor would still run and attempt to end the critical section, while the moved-to object would also try to end it, again causing double-unlock. This follows the same pattern used by other RAII lock guards in the codebase, such as gil_scoped_acquire and gil_scoped_release, which also explicitly delete copy/move operations to prevent similar issues. By explicitly deleting these operations, we prevent accidental misuse and ensure the critical section is properly managed by a single RAII object throughout its lifetime. --- include/pybind11/detail/internals.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/pybind11/detail/internals.h b/include/pybind11/detail/internals.h index c489dac6d3..37cea1a311 100644 --- a/include/pybind11/detail/internals.h +++ b/include/pybind11/detail/internals.h @@ -248,6 +248,12 @@ class pycritical_section { PyCriticalSection_BeginMutex(&cs, &mutex.mutex); } ~pycritical_section() { PyCriticalSection_End(&cs); } + + // Non-copyable and non-movable to prevent double-unlock + pycritical_section(const pycritical_section &) = delete; + pycritical_section &operator=(const pycritical_section &) = delete; + pycritical_section(pycritical_section &&) = delete; + pycritical_section &operator=(pycritical_section &&) = delete; }; // Instance map shards are used to reduce mutex contention in free-threaded Python. From 61e032eb9f8404757b698fc81feae0c4d39a8f6a Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 24 Jan 2026 10:26:35 -0800 Subject: [PATCH 6/8] Drop Python 3.13t support from CI Python 3.13t was experimental, while Python 3.14t is not. This PR uses PyCriticalSection_BeginMutex which is only available in Python 3.14+, making Python 3.13t incompatible with the changes. Removed all Python 3.13t CI jobs: - ubuntu-latest, 3.13t (standard-large matrix) - macos-15-intel, 3.13t (standard-large matrix) - windows-latest, 3.13t (standard-large matrix) - manylinux job testing 3.13t This aligns with the decision to drop Python 3.13t support as discussed in PR #5971. --- .github/workflows/ci.yml | 35 ----------------------------------- 1 file changed, 35 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 45f8675889..07535ea750 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,9 +83,6 @@ jobs: - runs-on: ubuntu-latest python-version: '3.12' cmake-args: -DPYBIND11_TEST_SMART_HOLDER=ON -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON - - runs-on: ubuntu-latest - python-version: '3.13t' - cmake-args: -DCMAKE_CXX_STANDARD=20 -DPYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION=ON - runs-on: ubuntu-latest python-version: '3.14' cmake-args: -DCMAKE_CXX_STANDARD=14 @@ -105,9 +102,6 @@ jobs: - runs-on: macos-latest python-version: '3.12' cmake-args: -DCMAKE_CXX_STANDARD=17 -DPYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION=ON - - runs-on: macos-15-intel - python-version: '3.13t' - cmake-args: -DCMAKE_CXX_STANDARD=11 - runs-on: macos-latest python-version: '3.14t' cmake-args: -DCMAKE_CXX_STANDARD=20 @@ -138,9 +132,6 @@ jobs: - runs-on: windows-2022 python-version: '3.13' cmake-args: -DCMAKE_MSVC_RUNTIME_LIBRARY=MultiThreadedDebugDLL - - runs-on: windows-latest - python-version: '3.13t' - cmake-args: -DCMAKE_CXX_STANDARD=17 - runs-on: windows-latest python-version: '3.14' cmake-args: -DCMAKE_CXX_STANDARD=20 @@ -239,32 +230,6 @@ jobs: run: cmake --build . --target test_cross_module_rtti - manylinux: - name: Manylinux on 🐍 3.13t • GIL - if: github.event.pull_request.draft == false - runs-on: ubuntu-latest - timeout-minutes: 40 - container: quay.io/pypa/musllinux_1_2_x86_64:latest - steps: - - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - - name: Prepare uv's path - run: echo "$HOME/.local/bin" >> $GITHUB_PATH - - - name: Install ninja - run: uv tool install ninja - - - name: Configure via preset - run: cmake --preset venv -DPYBIND11_CREATE_WITH_UV=python3.13t - - - name: Build C++11 - run: cmake --build --preset venv - - - name: Python tests C++11 - run: cmake --build --preset testsvenv -t pytest - deadsnakes: if: github.event.pull_request.draft == false strategy: From 29840cec684546f06b5d309e7716b3a3b90962f2 Mon Sep 17 00:00:00 2001 From: "Ralf W. Grosse-Kunstleve" Date: Sat, 24 Jan 2026 10:35:30 -0800 Subject: [PATCH 7/8] Add Python 3.13 (default) replacement jobs for removed 3.13t jobs After removing Python 3.13t support (incompatible with PyCriticalSection_BeginMutex which requires Python 3.14+), we're adding replacement jobs using Python 3.13 (default) to maintain test coverage in key dimensions: 1. ubuntu-latest, Python 3.13: C++20 + DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION - Replaces: ubuntu-latest, 3.13t with same config - Maintains coverage for this specific configuration combination 2. macos-15-intel, Python 3.13: C++11 - Replaces: macos-15-intel, 3.13t with same config - Maintains macOS coverage for Python 3.13 3. manylinux (musllinux), Python 3.13: GIL testing - Replaces: manylinux, 3.13t job - Maintains manylinux/musllinux container testing coverage These additions are proposed to get feedback on which jobs should be kept to maintain appropriate test coverage without the experimental 3.13t builds. --- .github/workflows/ci.yml | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 07535ea750..4fb13176e7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,6 +83,9 @@ jobs: - runs-on: ubuntu-latest python-version: '3.12' cmake-args: -DPYBIND11_TEST_SMART_HOLDER=ON -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON + - runs-on: ubuntu-latest + python-version: '3.13' + cmake-args: -DCMAKE_CXX_STANDARD=20 -DPYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION=ON - runs-on: ubuntu-latest python-version: '3.14' cmake-args: -DCMAKE_CXX_STANDARD=14 @@ -99,6 +102,9 @@ jobs: - runs-on: macos-15-intel python-version: '3.11' cmake-args: -DPYBIND11_TEST_SMART_HOLDER=ON + - runs-on: macos-15-intel + python-version: '3.13' + cmake-args: -DCMAKE_CXX_STANDARD=11 - runs-on: macos-latest python-version: '3.12' cmake-args: -DCMAKE_CXX_STANDARD=17 -DPYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION=ON @@ -230,6 +236,32 @@ jobs: run: cmake --build . --target test_cross_module_rtti + manylinux: + name: Manylinux on 🐍 3.13 • GIL + if: github.event.pull_request.draft == false + runs-on: ubuntu-latest + timeout-minutes: 40 + container: quay.io/pypa/musllinux_1_2_x86_64:latest + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Prepare uv's path + run: echo "$HOME/.local/bin" >> $GITHUB_PATH + + - name: Install ninja + run: uv tool install ninja + + - name: Configure via preset + run: cmake --preset venv -DPYBIND11_CREATE_WITH_UV=python3.13 + + - name: Build C++11 + run: cmake --build --preset venv + + - name: Python tests C++11 + run: cmake --build --preset testsvenv -t pytest + deadsnakes: if: github.event.pull_request.draft == false strategy: From 91189c9242e787922d26ca467710dcc494871b82 Mon Sep 17 00:00:00 2001 From: Henry Schreiner Date: Mon, 26 Jan 2026 12:37:57 -0500 Subject: [PATCH 8/8] ci: run in free-threading mode a bit more on 3.14 --- .github/workflows/ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4fb13176e7..2be71f404f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,7 +84,7 @@ jobs: python-version: '3.12' cmake-args: -DPYBIND11_TEST_SMART_HOLDER=ON -DPYBIND11_SIMPLE_GIL_MANAGEMENT=ON - runs-on: ubuntu-latest - python-version: '3.13' + python-version: '3.14t' cmake-args: -DCMAKE_CXX_STANDARD=20 -DPYBIND11_DISABLE_HANDLE_TYPE_NAME_DEFAULT_IMPLEMENTATION=ON - runs-on: ubuntu-latest python-version: '3.14' @@ -237,7 +237,7 @@ jobs: manylinux: - name: Manylinux on 🐍 3.13 • GIL + name: Manylinux on 🐍 3.14t if: github.event.pull_request.draft == false runs-on: ubuntu-latest timeout-minutes: 40 @@ -254,7 +254,7 @@ jobs: run: uv tool install ninja - name: Configure via preset - run: cmake --preset venv -DPYBIND11_CREATE_WITH_UV=python3.13 + run: cmake --preset venv -DPYBIND11_CREATE_WITH_UV=python3.14t - name: Build C++11 run: cmake --build --preset venv