From d156db5f3446a7c4c98095d535197451029ed079 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Tue, 10 Feb 2026 12:00:39 +1300
Subject: [PATCH 1/4] New version.

---
 VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VERSION.txt b/VERSION.txt
index 524ef5293..3e63e3598 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-0.20251204.0
+0.20260210.0

From 617a18f643c82be371cbe3bca6889f1f2f6d4e11 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Tue, 10 Feb 2026 12:00:06 +1300
Subject: [PATCH 2/4] nanobind: upgraded to version 2.11.0.

---
 extern/README.md                              |   2 +-
 extern/nanobind/.github/workflows/ci.yml      | 190 +++--
 extern/nanobind/CMakeLists.txt                |   2 +-
 extern/nanobind/README.md                     |  58 ++
 extern/nanobind/cmake/collect-symbols-pypy.py |   5 +-
 extern/nanobind/cmake/collect-symbols.py      |   4 +-
 extern/nanobind/cmake/darwin-ld-cpython.sym   |  59 ++
 extern/nanobind/cmake/darwin-ld-pypy.sym      | 172 +++-
 extern/nanobind/cmake/nanobind-config.cmake   |  50 +-
 extern/nanobind/docs/api_bazel.rst            |  26 +-
 extern/nanobind/docs/api_core.rst             |   6 +-
 extern/nanobind/docs/api_extra.rst            |  15 +
 extern/nanobind/docs/bazel.rst                |  24 +-
 extern/nanobind/docs/building.rst             |   4 +-
 extern/nanobind/docs/changelog.rst            | 183 +++++
 extern/nanobind/docs/classes.rst              |  40 +
 extern/nanobind/docs/exceptions.rst           |   6 +-
 extern/nanobind/docs/index.rst                |   9 +-
 extern/nanobind/docs/ndarray.rst              | 114 ++-
 extern/nanobind/docs/packaging.rst            |   5 +-
 extern/nanobind/docs/porting.rst              |   5 +
 extern/nanobind/docs/refleaks.rst             |  30 +-
 extern/nanobind/docs/typing.rst               |   9 +-
 extern/nanobind/docs/why.rst                  |  95 +--
 .../nanobind/include/nanobind/eigen/dense.h   |   9 +-
 .../nanobind/include/nanobind/intrusive/ref.h |   2 +-
 extern/nanobind/include/nanobind/nanobind.h   |   5 +-
 extern/nanobind/include/nanobind/nb_attr.h    |  54 +-
 extern/nanobind/include/nanobind/nb_call.h    |   2 +-
 extern/nanobind/include/nanobind/nb_cast.h    |  20 +-
 extern/nanobind/include/nanobind/nb_class.h   |  10 +-
 extern/nanobind/include/nanobind/nb_defs.h    |  68 +-
 extern/nanobind/include/nanobind/nb_func.h    |  25 +-
 extern/nanobind/include/nanobind/nb_lib.h     |  26 +-
 extern/nanobind/include/nanobind/nb_python.h  |   4 +-
 extern/nanobind/include/nanobind/nb_traits.h  |   6 +
 extern/nanobind/include/nanobind/nb_types.h   |  44 +-
 extern/nanobind/include/nanobind/ndarray.h    |  10 +-
 extern/nanobind/include/nanobind/stl/chrono.h |  18 +-
 .../include/nanobind/stl/detail/nb_array.h    |   6 +-
 .../include/nanobind/stl/detail/nb_dict.h     |   2 +-
 .../include/nanobind/stl/detail/nb_list.h     |   2 +-
 .../include/nanobind/stl/detail/nb_set.h      |   2 +-
 .../include/nanobind/stl/filesystem.h         |   4 -
 .../nanobind/include/nanobind/stl/function.h  |   2 +-
 .../nanobind/include/nanobind/stl/optional.h  |   3 +
 extern/nanobind/include/nanobind/stl/pair.h   |   2 +-
 extern/nanobind/include/nanobind/stl/tuple.h  |   2 +-
 extern/nanobind/include/nanobind/trampoline.h |   2 +-
 extern/nanobind/include/nanobind/typing.h     |   5 +
 extern/nanobind/pyproject.toml                |   2 +-
 extern/nanobind/src/__init__.py               |   6 +-
 extern/nanobind/src/common.cpp                |  53 +-
 extern/nanobind/src/error.cpp                 |  11 -
 extern/nanobind/src/implicit.cpp              |  12 +-
 extern/nanobind/src/nb_abi.h                  |   2 +-
 extern/nanobind/src/nb_combined.cpp           |   4 +-
 extern/nanobind/src/nb_enum.cpp               |   5 +-
 extern/nanobind/src/nb_func.cpp               |  50 +-
 extern/nanobind/src/nb_internals.cpp          | 133 ++-
 extern/nanobind/src/nb_internals.h            |  72 +-
 extern/nanobind/src/nb_ndarray.cpp            | 755 ++++++++++++------
 extern/nanobind/src/nb_type.cpp               | 361 ++++-----
 extern/nanobind/src/stubgen.py                | 153 +++-
 extern/nanobind/src/version.py                |  25 +-
 extern/nanobind/tests/CMakeLists.txt          |  12 +-
 extern/nanobind/tests/conftest.py             |   2 +-
 extern/nanobind/tests/inter_module.cpp        |   8 +-
 extern/nanobind/tests/inter_module.h          |   3 +-
 extern/nanobind/tests/pattern_file.nb         |   6 +
 extern/nanobind/tests/py_stub_test.py         |   1 +
 extern/nanobind/tests/py_stub_test.pyi.ref    |   2 +
 extern/nanobind/tests/test_classes.cpp        |  78 +-
 extern/nanobind/tests/test_classes.h          |  21 +
 extern/nanobind/tests/test_classes.py         |  63 +-
 .../nanobind/tests/test_classes_ext.pyi.ref   |  32 +
 extern/nanobind/tests/test_classes_extra.cpp  |  22 +
 extern/nanobind/tests/test_eigen.py           |  17 +
 extern/nanobind/tests/test_enum.cpp           |  11 +
 extern/nanobind/tests/test_enum.py            |  15 +
 extern/nanobind/tests/test_enum_ext.pyi.ref   |   9 +
 extern/nanobind/tests/test_functions.cpp      |   6 +
 extern/nanobind/tests/test_functions.py       |  36 +-
 .../nanobind/tests/test_functions_ext.pyi.ref |   4 +
 extern/nanobind/tests/test_inter_module.py    |  78 +-
 extern/nanobind/tests/test_inter_module_2.cpp |   1 +
 extern/nanobind/tests/test_jax.cpp            |  11 +-
 extern/nanobind/tests/test_jax.py             |  97 +++
 extern/nanobind/tests/test_ndarray.cpp        |  90 ++-
 extern/nanobind/tests/test_ndarray.py         | 453 ++++++-----
 .../nanobind/tests/test_ndarray_ext.pyi.ref   |  22 +-
 extern/nanobind/tests/test_specialization.py  | 103 +++
 extern/nanobind/tests/test_stl.cpp            |   6 +-
 extern/nanobind/tests/test_stl.py             |  38 +-
 extern/nanobind/tests/test_stl_bind_map.py    |   5 +-
 extern/nanobind/tests/test_stl_ext.pyi.ref    |   2 +-
 extern/nanobind/tests/test_stubs.py           |   4 +-
 extern/nanobind/tests/test_tensorflow.cpp     |  10 +-
 extern/nanobind/tests/test_tensorflow.py      |  97 +++
 extern/nanobind/tests/test_thread.cpp         |  22 +
 extern/nanobind/tests/test_thread.py          |  16 +
 extern/nanobind/tests/test_typing.cpp         |  12 +-
 extern/nanobind/tests/test_typing.py          |   1 -
 extern/nanobind/tests/test_typing_ext.pyi.ref |  14 +-
 104 files changed, 3219 insertions(+), 1303 deletions(-)
 create mode 100644 extern/nanobind/tests/test_classes.h
 create mode 100644 extern/nanobind/tests/test_classes_extra.cpp
 create mode 100644 extern/nanobind/tests/test_specialization.py

diff --git a/extern/README.md b/extern/README.md
index 06df16d38..4fe5a6b81 100644
--- a/extern/README.md
+++ b/extern/README.md
@@ -2,4 +2,4 @@
 
 - [GoogleTest](https://github.com/google/googletest) [1.17.0](https://github.com/google/googletest/releases/tag/v1.17.0)
 - [modp_b64](https://chromium.googlesource.com/chromium/src/third_party/modp_b64/) at commit [5068510](https://chromium.googlesource.com/chromium/src/third_party/modp_b64/+/50685101d51ef9aabbd60c94f52d9e026d39c509)
-- [nanobind](https://github.com/wjakob/nanobind) [2.9.2](https://github.com/wjakob/nanobind/releases/tag/v2.9.2)
+- [nanobind](https://github.com/wjakob/nanobind) [2.11.0](https://github.com/wjakob/nanobind/releases/tag/v2.11.0)
diff --git a/extern/nanobind/.github/workflows/ci.yml b/extern/nanobind/.github/workflows/ci.yml
index ee4589d95..9568eca5d 100644
--- a/extern/nanobind/.github/workflows/ci.yml
+++ b/extern/nanobind/.github/workflows/ci.yml
@@ -20,8 +20,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: ['ubuntu-latest', 'windows-2022', 'macos-13']
-        python: ['3.8', '3.9', '3.10', '3.11', '3.12', '3.13', '3.14.0-rc.2', 'pypy3.9-v7.3.16', 'pypy3.10-v7.3.17']
+        os: ['ubuntu-latest', 'windows-2022', 'macos-15']
+        python: ['3.9', '3.10', '3.11', '3.12', '3.13', '3.14', 'pypy3.10-v7.3.19', 'pypy3.11-v7.3.20']
 
     name: "Python ${{ matrix.python }} / ${{ matrix.os }}"
     runs-on: ${{ matrix.os }}
@@ -107,98 +107,180 @@ jobs:
         cd build;
         python3 -m pytest
 
-  old-compilers:
-    if: false # Disable for now, the CI is glitchy
-    strategy:
-      fail-fast: false
-      matrix:
-        include:
-          - cc: gcc-8
-            cxx: g++-8
-            apt: gcc-8 g++-8
-          - cc: gcc-9
-            cxx: g++-9
-            apt: gcc-9
-          - cc: clang-8
-            cxx: clang++-8
-            apt: clang-8
-          - cc: clang-9
-            cxx: clang++-9
-            apt: clang-9
-          - cc: clang-10
-            cxx: clang++-10
-            apt: clang-10
-
+  free-threaded:
+    name: "Python 3.14-dev / ubuntu.latest [free-threaded]"
     runs-on: ubuntu-latest
-    container: ubuntu:20.04
-    name: "${{matrix.cc}} on Ubuntu 20.04"
-    env:
-      CC: ${{matrix.cc}}
-      CXX: ${{matrix.cxx}}
-      DEBIAN_FRONTEND: noninteractive
 
     steps:
-    - name: Install dependencies
-      run: |
-        apt-get update
-        apt-get install -y python3-numpy python3-pip python3-pytest libeigen3-dev cmake git ${{matrix.apt}}
-        python3 -m pip install typing_extensions
-
     - uses: actions/checkout@v4
       with:
         submodules: true
 
+    - uses: deadsnakes/action@v3.1.0
+      with:
+        python-version: 3.14-dev
+        nogil: true
+
+    - name: Install the latest CMake
+      uses: lukka/get-cmake@latest
+
+    - name: Install PyTest
+      run: |
+        python -m pip install pytest pytest-github-actions-annotate-failures
+
     - name: Configure
-      run: cmake -S . -B build
+      run: >
+        cmake -S . -B build -DNB_TEST_FREE_THREADED=ON
 
     - name: Build C++
-      run: cmake --build build -j 2
+      run: >
+        cmake --build build -j 2
 
     - name: Check ABI tag
       run: >
         cd build/tests;
-        python3 -c 'import test_functions_ext as t; print(f"ABI tag is \"{ t.abi_tag() }\"")'
+        python -c 'import test_functions_ext as t; print(f"ABI tag is \"{ t.abi_tag() }\"")'
 
     - name: Run tests
       run: >
         cd build;
-        python3 -m pytest
+        python -m pytest
 
-  free-threaded:
-    name: "Python 3.14-dev / ubuntu.latest [free-threaded]"
-    runs-on: ubuntu-latest
+  mingw:
+    runs-on: windows-2022
+    name: "Python ${{ matrix.python }} / MinGW-w64"
+    strategy:
+      fail-fast: false
+      matrix:
+        python: ['3.12']
 
     steps:
     - uses: actions/checkout@v4
       with:
         submodules: true
 
-    - uses: deadsnakes/action@v3.1.0
+    - name: Setup Python ${{ matrix.python }}
+      uses: actions/setup-python@v5
       with:
-        python-version: 3.14-dev
-        nogil: true
+        python-version: ${{ matrix.python }}
+        cache: 'pip'
+
+    - name: Setup MSYS2 (MINGW64)
+      uses: msys2/setup-msys2@v2
+      with:
+        msystem: MINGW64
+        install: >-
+          mingw-w64-x86_64-gcc
+          mingw-w64-x86_64-cmake
+          mingw-w64-x86_64-ninja
+          mingw-w64-x86_64-python
+          mingw-w64-x86_64-python-pip
+          mingw-w64-x86_64-python-pytest
+
+    - name: Install Python packages
+      shell: msys2 {0}
+      run: |
+        python -m pip install pytest-github-actions-annotate-failures typing_extensions
+
+    - name: Configure
+      shell: msys2 {0}
+      run: |
+        export PATH=/mingw64/bin:$PATH
+        export CC=gcc
+        export CXX=g++
+        PYEXE=/mingw64/bin/python3.exe
+        cmake -S . -B build -G Ninja \
+          -DPython_EXECUTABLE="$(cygpath -w "$PYEXE")" \
+          -DNB_TEST_FREE_THREADED=OFF
+
+    - name: Build C++
+      shell: msys2 {0}
+      run: cmake --build build -j 2
+
+    - name: Check ABI tag
+      shell: msys2 {0}
+      run: |
+        cd build/tests
+        python -c 'import test_functions_ext as t; print(f"ABI tag is \"{t.abi_tag()}\"")'
+
+    - name: Run tests
+      shell: msys2 {0}
+      run: |
+        cd build
+        python -m pytest
+
+  intel:
+    runs-on: ubuntu-22.04
+    name: "Python ${{ matrix.python }} / Intel ICX"
+    strategy:
+      fail-fast: false
+      matrix:
+        python: ['3.12']
+
+    steps:
+    - uses: actions/checkout@v4
+      with:
+        submodules: true
+
+    - name: Setup Python ${{ matrix.python }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python }}
+        cache: 'pip'
+
+    - name: Cache Intel oneAPI
+      id: cache-oneapi
+      uses: actions/cache@v4
+      with:
+        path: /opt/intel/oneapi
+        key: install-${{ runner.os }}-intel-oneapi-compiler-2025.2
+
+    - name: Add Intel repository
+      if: steps.cache-oneapi.outputs.cache-hit != 'true'
+      run: |
+        wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor | sudo tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null
+        echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list
+        sudo apt-get update
+
+    - name: Install Intel oneAPI compilers
+      if: steps.cache-oneapi.outputs.cache-hit != 'true'
+      run: |
+        sudo apt-get install -y intel-oneapi-compiler-dpcpp-cpp
+
+    - name: Cleanup Intel oneAPI cache
+      if: steps.cache-oneapi.outputs.cache-hit != 'true'
+      run: |
+        sudo rm -rf /opt/intel/oneapi/compiler/*/linux/lib/ia32
+        sudo rm -rf /opt/intel/oneapi/compiler/*/linux/lib/emu
+        sudo rm -rf /opt/intel/oneapi/compiler/*/linux/lib/oclfpga
 
     - name: Install the latest CMake
       uses: lukka/get-cmake@latest
 
     - name: Install PyTest
       run: |
-        python -m pip install pytest pytest-github-actions-annotate-failures
+        python -m pip install pytest pytest-github-actions-annotate-failures typing_extensions
 
     - name: Configure
-      run: >
-        cmake -S . -B build -DNB_TEST_FREE_THREADED=ON
+      run: |
+        source /opt/intel/oneapi/setvars.sh
+        export CC=icx
+        export CXX=icpx
+        cmake -S . -B build
 
     - name: Build C++
-      run: >
+      run: |
+        source /opt/intel/oneapi/setvars.sh
         cmake --build build -j 2
 
     - name: Check ABI tag
-      run: >
-        cd build/tests;
+      run: |
+        source /opt/intel/oneapi/setvars.sh
+        cd build/tests
         python -c 'import test_functions_ext as t; print(f"ABI tag is \"{ t.abi_tag() }\"")'
 
     - name: Run tests
-      run: >
-        cd build;
+      run: |
+        source /opt/intel/oneapi/setvars.sh
+        cd build
         python -m pytest
diff --git a/extern/nanobind/CMakeLists.txt b/extern/nanobind/CMakeLists.txt
index 1c1ebdf23..3d59ca634 100644
--- a/extern/nanobind/CMakeLists.txt
+++ b/extern/nanobind/CMakeLists.txt
@@ -145,7 +145,7 @@ if (NOT TARGET Python::Module OR NOT TARGET Python::Interpreter)
     set(NB_PYTHON_DEV_MODULE Development.Module)
   endif()
 
-  find_package(Python 3.8
+  find_package(Python 3.9
     REQUIRED COMPONENTS Interpreter ${NB_PYTHON_DEV_MODULE}
     OPTIONAL_COMPONENTS Development.SABIModule)
 endif()
diff --git a/extern/nanobind/README.md b/extern/nanobind/README.md
index 4d5b12ade..efff040f5 100644
--- a/extern/nanobind/README.md
+++ b/extern/nanobind/README.md
@@ -30,6 +30,64 @@ runtime overheads compared to pybind11. nanobind also outperforms Cython in
 important metrics (**3-12×** binary size reduction, **1.6-4×** compilation time
 reduction, similar runtime performance).
 
+## Testimonials
+
+A selection of testimonials from projects that migrated from pybind11 to nanobind.
+
+<table>
+<tr><td>
+
+**IREE** · [LLVM Discourse](https://discourse.llvm.org/t/nanobind-for-mlir-python-bindings/83511/5)
+
+> *"IREE and its derivatives switched 1.5 years ago. It has been one of the single best dep decisions I've made. Not only is it much-much faster to compile, it produces smaller binaries and has a much more lean interface to the underlying Python machinery that all adds up to significant performance improvements. Worked exactly like it said on the tin."*
+
+— **Stella Laurenzo**, Google
+
+</td></tr>
+<tr><td>
+
+**XLA/MLIR** · [GitHub PR](https://github.com/llvm/llvm-project/pull/118583)
+
+> *"For a complicated Google-internal LLM model in JAX, this change improves the MLIR lowering time by around 5s (out of around 30s), which is a significant speedup for simply switching binding frameworks."*
+
+— **Peter Hawkins**, Google
+
+</td></tr>
+<tr><td>
+
+**Apple MLX** · [X](https://x.com/awnihannun/status/1890495434021326974)
+
+> *"MLX uses nanobind to bind C++ to Python. It's a critical piece of MLX infra and is why running Python code is nearly the same speed as running C++ directly. Also makes it super easy to move arrays between frameworks."*
+
+— **Awni Hannun**, Apple
+
+</td></tr>
+<tr><td>
+
+**JAX** · [GitHub](https://github.com/jax-ml/jax/commit/70b7d501816c6e9f131a0a8b3e4a527e53eeebd7)
+
+> *"nanobind has a number of [advantages](https://nanobind.readthedocs.io/en/latest/why.html), notably speed of compilation and dispatch, but the main reason to do this for these bindings is because nanobind can target the Python Stable ABI starting with Python 3.12. This means that we will not need to ship per-Python version CUDA plugins starting with Python 3.12."*
+
+— **Peter Hawkins**, Google
+
+</td></tr>
+<tr><td>
+
+**FEniCS / DOLFINx** · [GitHub](https://github.com/FEniCS/dolfinx/pull/2820)
+
+> *"nanobind is smaller than pybind11, the wrappers build faster and it has significantly improved support for wrapping multi-dimensional arrays, which we use heavily. The nanobind docs are easier to follow on the low-level details, which makes understanding the memory management in the wrapper layer easier."*
+
+— **Garth N. Wells**
+</td></tr>
+<tr><td>
+
+**PennyLane** · [Release notes](https://docs.pennylane.ai/projects/catalyst/en/stable/dev/release_notes.html)
+
+> *"Nanobind has been developed as a natural successor to the pybind11 library and offers a number of advantages like its ability to target Python's Stable ABI."*
+
+</td></tr>
+</table>
+
 ## Documentation
 
 Please see the following links for tutorial and reference documentation in
diff --git a/extern/nanobind/cmake/collect-symbols-pypy.py b/extern/nanobind/cmake/collect-symbols-pypy.py
index 805490d0c..b76886d01 100644
--- a/extern/nanobind/cmake/collect-symbols-pypy.py
+++ b/extern/nanobind/cmake/collect-symbols-pypy.py
@@ -5,7 +5,8 @@
 funcs: "set[str]" = set()
 
 files = [
-    ('https://downloads.python.org/pypy/pypy3.9-v7.3.11-macos_arm64.tar.bz2', 'pypy3.9-v7.3.11-macos_arm64/bin/libpypy3.9-c.dylib')
+    ('https://downloads.python.org/pypy/pypy3.9-v7.3.11-macos_arm64.tar.bz2', 'pypy3.9-v7.3.11-macos_arm64/bin/libpypy3.9-c.dylib'),
+    ('https://downloads.python.org/pypy/pypy3.11-v7.3.20-macos_arm64.tar.bz2', 'pypy3.11-v7.3.20-macos_arm64/bin/libpypy3.11-c.dylib'),
 ]
 
 for f in files:
@@ -25,4 +26,4 @@
 
 with open("darwin-ld-pypy.sym", "w") as f:
     for func in sorted(list(funcs)):
-        f.write(f'-U _{func}\n')
+        f.write(f'-U {func}\n')
diff --git a/extern/nanobind/cmake/collect-symbols.py b/extern/nanobind/cmake/collect-symbols.py
index ad92da4c1..246397686 100644
--- a/extern/nanobind/cmake/collect-symbols.py
+++ b/extern/nanobind/cmake/collect-symbols.py
@@ -10,13 +10,13 @@
 
 funcs: "set[str]" = set()
 
-for ver in ['3.7', '3.8', '3.9']:
+for ver in ['3.9']:
   url = f'https://raw.githubusercontent.com/python/cpython/{ver}/PC/python3.def'
   output = urlopen(url).read().decode('utf-8')
   for match in re.findall(r"  (.*)=.*", output):
       funcs.add(match)
 
-for ver in ['3.10', '3.11', 'main']:
+for ver in ['3.10', '3.11', '3.12', '3.13', '3.14', 'main']:
     url = f'https://raw.githubusercontent.com/python/cpython/{ver}/PC/python3dll.c'
     output = urlopen(url).read().decode('utf-8')
     for match in re.findall(r"EXPORT_FUNC\((.*)\)", output):
diff --git a/extern/nanobind/cmake/darwin-ld-cpython.sym b/extern/nanobind/cmake/darwin-ld-cpython.sym
index 031e74e1f..819f2eca0 100644
--- a/extern/nanobind/cmake/darwin-ld-cpython.sym
+++ b/extern/nanobind/cmake/darwin-ld-cpython.sym
@@ -1,3 +1,4 @@
+-U _PyABIInfo_Check
 -U _PyAIter_Check
 -U _PyArg_Parse
 -U _PyArg_ParseTuple
@@ -128,6 +129,7 @@
 -U _PyErr_CheckSignals
 -U _PyErr_Clear
 -U _PyErr_Display
+-U _PyErr_DisplayException
 -U _PyErr_ExceptionMatches
 -U _PyErr_Fetch
 -U _PyErr_Format
@@ -184,6 +186,9 @@
 -U _PyEval_GetBuiltins
 -U _PyEval_GetCallStats
 -U _PyEval_GetFrame
+-U _PyEval_GetFrameBuiltins
+-U _PyEval_GetFrameGlobals
+-U _PyEval_GetFrameLocals
 -U _PyEval_GetFuncDesc
 -U _PyEval_GetFuncName
 -U _PyEval_GetGlobals
@@ -263,9 +268,11 @@
 -U _PyExc_WindowsError
 -U _PyExc_ZeroDivisionError
 -U _PyExceptionClass_Name
+-U _PyException_GetArgs
 -U _PyException_GetCause
 -U _PyException_GetContext
 -U _PyException_GetTraceback
+-U _PyException_SetArgs
 -U _PyException_SetCause
 -U _PyException_SetContext
 -U _PyException_SetTraceback
@@ -326,6 +333,7 @@
 -U _PyInterpreterState_New
 -U _PyIter_Check
 -U _PyIter_Next
+-U _PyIter_NextItem
 -U _PyIter_Send
 -U _PyListIter_Type
 -U _PyListRevIter_Type
@@ -343,33 +351,49 @@
 -U _PyList_Type
 -U _PyLongRangeIter_Type
 -U _PyLong_AsDouble
+-U _PyLong_AsInt
+-U _PyLong_AsInt32
+-U _PyLong_AsInt64
 -U _PyLong_AsLong
 -U _PyLong_AsLongAndOverflow
 -U _PyLong_AsLongLong
 -U _PyLong_AsLongLongAndOverflow
+-U _PyLong_AsNativeBytes
 -U _PyLong_AsSize_t
 -U _PyLong_AsSsize_t
+-U _PyLong_AsUInt32
+-U _PyLong_AsUInt64
 -U _PyLong_AsUnsignedLong
 -U _PyLong_AsUnsignedLongLong
 -U _PyLong_AsUnsignedLongLongMask
 -U _PyLong_AsUnsignedLongMask
 -U _PyLong_AsVoidPtr
 -U _PyLong_FromDouble
+-U _PyLong_FromInt32
+-U _PyLong_FromInt64
 -U _PyLong_FromLong
 -U _PyLong_FromLongLong
+-U _PyLong_FromNativeBytes
 -U _PyLong_FromSize_t
 -U _PyLong_FromSsize_t
+-U _PyLong_FromUInt32
+-U _PyLong_FromUInt64
 -U _PyLong_FromString
 -U _PyLong_FromUnsignedLong
 -U _PyLong_FromUnsignedLongLong
+-U _PyLong_FromUnsignedNativeBytes
 -U _PyLong_FromVoidPtr
 -U _PyLong_GetInfo
 -U _PyLong_Type
 -U _PyMap_Type
 -U _PyMapping_Check
 -U _PyMapping_GetItemString
+-U _PyMapping_GetOptionalItem
+-U _PyMapping_GetOptionalItemString
 -U _PyMapping_HasKey
 -U _PyMapping_HasKeyString
+-U _PyMapping_HasKeyStringWithError
+-U _PyMapping_HasKeyWithError
 -U _PyMapping_Items
 -U _PyMapping_Keys
 -U _PyMapping_Length
@@ -381,6 +405,10 @@
 -U _PyMem_Calloc
 -U _PyMem_Free
 -U _PyMem_Malloc
+-U _PyMem_RawCalloc
+-U _PyMem_RawFree
+-U _PyMem_RawMalloc
+-U _PyMem_RawRealloc
 -U _PyMem_Realloc
 -U _PyMemberDescr_Type
 -U _PyMember_GetOne
@@ -393,6 +421,7 @@
 -U _PyMethodDescr_Type
 -U _PyModuleDef_Init
 -U _PyModuleDef_Type
+-U _PyModule_Add
 -U _PyModule_AddFunctions
 -U _PyModule_AddIntConstant
 -U _PyModule_AddObject
@@ -401,7 +430,9 @@
 -U _PyModule_AddType
 -U _PyModule_Create2
 -U _PyModule_ExecDef
+-U _PyModule_Exec
 -U _PyModule_FromDefAndSpec2
+-U _PyModule_FromSlotsAndSpec
 -U _PyModule_GetDef
 -U _PyModule_GetDict
 -U _PyModule_GetFilename
@@ -409,6 +440,8 @@
 -U _PyModule_GetName
 -U _PyModule_GetNameObject
 -U _PyModule_GetState
+-U _PyModule_GetStateSize
+-U _PyModule_GetToken
 -U _PyModule_New
 -U _PyModule_NewObject
 -U _PyModule_SetDocString
@@ -521,6 +554,8 @@
 -U _PyObject_GetIter
 -U _PyObject_HasAttr
 -U _PyObject_HasAttrString
+-U _PyObject_HasAttrStringWithError
+-U _PyObject_HasAttrWithError
 -U _PyObject_Hash
 -U _PyObject_HashNotImplemented
 -U _PyObject_Init
@@ -601,9 +636,15 @@
 -U _PySys_AddWarnOption
 -U _PySys_AddWarnOptionUnicode
 -U _PySys_AddXOption
+-U _PySys_Audit
+-U _PySys_AuditTuple
 -U _PySys_FormatStderr
 -U _PySys_FormatStdout
+-U _PySys_GetAttr
+-U _PySys_GetAttrString
 -U _PySys_GetObject
+-U _PySys_GetOptionalAttr
+-U _PySys_GetOptionalAttrString
 -U _PySys_GetXOptions
 -U _PySys_HasWarnOptions
 -U _PySys_ResetWarnOptions
@@ -663,15 +704,21 @@
 -U _PyTuple_Size
 -U _PyTuple_Type
 -U _PyType_ClearCache
+-U _PyType_Freeze
 -U _PyType_FromMetaclass
 -U _PyType_FromModuleAndSpec
 -U _PyType_FromSpec
 -U _PyType_FromSpecWithBases
 -U _PyType_GenericAlloc
 -U _PyType_GenericNew
+-U _PyType_GetBaseByToken
 -U _PyType_GetFlags
+-U _PyType_GetFullyQualifiedName
 -U _PyType_GetModule
+-U _PyType_GetModuleByDef
+-U _PyType_GetModuleByToken
 -U _PyType_GetModuleState
+-U _PyType_GetModuleName
 -U _PyType_GetName
 -U _PyType_GetQualName
 -U _PyType_GetSlot
@@ -757,6 +804,9 @@
 -U _PyUnicode_EncodeCodePage
 -U _PyUnicode_EncodeFSDefault
 -U _PyUnicode_EncodeLocale
+-U _PyUnicode_Equal
+-U _PyUnicode_EqualToUTF8
+-U _PyUnicode_EqualToUTF8AndSize
 -U _PyUnicode_FSConverter
 -U _PyUnicode_FSDecoder
 -U _PyUnicode_Find
@@ -822,6 +872,8 @@
 -U _Py_GetArgcArgv
 -U _Py_GetBuildInfo
 -U _Py_GetCompiler
+-U _Py_GetConstant
+-U _Py_GetConstantBorrowed
 -U _Py_GetCopyright
 -U _Py_GetExecPrefix
 -U _Py_GetPath
@@ -833,6 +885,7 @@
 -U _Py_GetRecursionLimit
 -U _Py_GetVersion
 -U _Py_HasFileSystemDefaultEncoding
+-U _Py_IS_TYPE
 -U _Py_IncRef
 -U _Py_Initialize
 -U _Py_InitializeEx
@@ -847,13 +900,18 @@
 -U _Py_MakePendingCalls
 -U _Py_NewInterpreter
 -U _Py_NewRef
+-U _Py_PACK_FULL_VERSION
+-U _Py_PACK_VERSION
 -U _Py_ReprEnter
 -U _Py_ReprLeave
+-U _Py_SET_SIZE
+-U _Py_SIZE
 -U _Py_SetPath
 -U _Py_SetProgramName
 -U _Py_SetPythonHome
 -U _Py_SetRecursionLimit
 -U _Py_SymtableString
+-U _Py_TYPE
 -U _Py_UTF8Mode
 -U _Py_VaBuildValue
 -U _Py_XNewRef
@@ -901,6 +959,7 @@
 -U __Py_NoneStruct
 -U __Py_NotImplementedStruct
 -U __Py_RefTotal
+-U __Py_SetRefcnt
 -U __Py_SwappedOp
 -U __Py_TrueStruct
 -U __Py_VaBuildValue_SizeT
diff --git a/extern/nanobind/cmake/darwin-ld-pypy.sym b/extern/nanobind/cmake/darwin-ld-pypy.sym
index 73ecb7750..4e3e1c709 100644
--- a/extern/nanobind/cmake/darwin-ld-pypy.sym
+++ b/extern/nanobind/cmake/darwin-ld-pypy.sym
@@ -1,4 +1,73 @@
 -U _PyArg_ValidateKeywordArguments
+-U _PyExpat_XML_DefaultCurrent
+-U _PyExpat_XML_ErrorString
+-U _PyExpat_XML_ExpatVersion
+-U _PyExpat_XML_ExpatVersionInfo
+-U _PyExpat_XML_ExternalEntityParserCreate
+-U _PyExpat_XML_FreeContentModel
+-U _PyExpat_XML_GetBase
+-U _PyExpat_XML_GetBuffer
+-U _PyExpat_XML_GetCurrentByteCount
+-U _PyExpat_XML_GetCurrentByteIndex
+-U _PyExpat_XML_GetCurrentColumnNumber
+-U _PyExpat_XML_GetCurrentLineNumber
+-U _PyExpat_XML_GetErrorCode
+-U _PyExpat_XML_GetFeatureList
+-U _PyExpat_XML_GetIdAttributeIndex
+-U _PyExpat_XML_GetInputContext
+-U _PyExpat_XML_GetParsingStatus
+-U _PyExpat_XML_GetSpecifiedAttributeCount
+-U _PyExpat_XML_MemFree
+-U _PyExpat_XML_MemMalloc
+-U _PyExpat_XML_MemRealloc
+-U _PyExpat_XML_Parse
+-U _PyExpat_XML_ParseBuffer
+-U _PyExpat_XML_ParserCreate
+-U _PyExpat_XML_ParserCreateNS
+-U _PyExpat_XML_ParserCreate_MM
+-U _PyExpat_XML_ParserFree
+-U _PyExpat_XML_ParserReset
+-U _PyExpat_XML_ResumeParser
+-U _PyExpat_XML_SetAttlistDeclHandler
+-U _PyExpat_XML_SetBase
+-U _PyExpat_XML_SetBillionLaughsAttackProtectionActivationThreshold
+-U _PyExpat_XML_SetBillionLaughsAttackProtectionMaximumAmplification
+-U _PyExpat_XML_SetCdataSectionHandler
+-U _PyExpat_XML_SetCharacterDataHandler
+-U _PyExpat_XML_SetCommentHandler
+-U _PyExpat_XML_SetDefaultHandler
+-U _PyExpat_XML_SetDefaultHandlerExpand
+-U _PyExpat_XML_SetDoctypeDeclHandler
+-U _PyExpat_XML_SetElementDeclHandler
+-U _PyExpat_XML_SetElementHandler
+-U _PyExpat_XML_SetEncoding
+-U _PyExpat_XML_SetEndCdataSectionHandler
+-U _PyExpat_XML_SetEndDoctypeDeclHandler
+-U _PyExpat_XML_SetEndElementHandler
+-U _PyExpat_XML_SetEndNamespaceDeclHandler
+-U _PyExpat_XML_SetEntityDeclHandler
+-U _PyExpat_XML_SetExternalEntityRefHandler
+-U _PyExpat_XML_SetExternalEntityRefHandlerArg
+-U _PyExpat_XML_SetHashSalt
+-U _PyExpat_XML_SetNamespaceDeclHandler
+-U _PyExpat_XML_SetNotStandaloneHandler
+-U _PyExpat_XML_SetNotationDeclHandler
+-U _PyExpat_XML_SetParamEntityParsing
+-U _PyExpat_XML_SetProcessingInstructionHandler
+-U _PyExpat_XML_SetReparseDeferralEnabled
+-U _PyExpat_XML_SetReturnNSTriplet
+-U _PyExpat_XML_SetSkippedEntityHandler
+-U _PyExpat_XML_SetStartCdataSectionHandler
+-U _PyExpat_XML_SetStartDoctypeDeclHandler
+-U _PyExpat_XML_SetStartElementHandler
+-U _PyExpat_XML_SetStartNamespaceDeclHandler
+-U _PyExpat_XML_SetUnknownEncodingHandler
+-U _PyExpat_XML_SetUnparsedEntityDeclHandler
+-U _PyExpat_XML_SetUserData
+-U _PyExpat_XML_SetXmlDeclHandler
+-U _PyExpat_XML_StopParser
+-U _PyExpat_XML_UseForeignDTD
+-U _PyExpat_XML_UseParserAsHandlerArg
 -U _PyModule_AddType
 -U _PyPyAnySet_Check
 -U _PyPyAnySet_CheckExact
@@ -42,8 +111,8 @@
 -U _PyPyCFunction_Call
 -U _PyPyCFunction_Check
 -U _PyPyCFunction_GetFunction
--U _PyPyCFunction_Type
 -U _PyPyCFunction_NewEx
+-U _PyPyCFunction_Type
 -U _PyPyCMethod_New
 -U _PyPyCallIter_New
 -U _PyPyCallable_Check
@@ -66,9 +135,14 @@
 -U _PyPyCode_Addr2Line
 -U _PyPyCode_Check
 -U _PyPyCode_CheckExact
+-U _PyPyCode_GetCellvars
+-U _PyPyCode_GetCode
+-U _PyPyCode_GetFreevars
 -U _PyPyCode_GetNumFree
+-U _PyPyCode_GetVarnames
 -U _PyPyCode_New
 -U _PyPyCode_NewEmpty
+-U _PyPyCode_NewWithPosOnlyArgs
 -U _PyPyCodec_Decode
 -U _PyPyCodec_Decoder
 -U _PyPyCodec_Encode
@@ -85,6 +159,7 @@
 -U _PyPyComplex_Type
 -U _PyPyContextVar_Get
 -U _PyPyContextVar_New
+-U _PyPyContextVar_Reset
 -U _PyPyContextVar_Set
 -U _PyPyCoro_Check
 -U _PyPyCoro_CheckExact
@@ -95,6 +170,7 @@
 -U _PyPyDateTime_DATE_GET_MICROSECOND
 -U _PyPyDateTime_DATE_GET_MINUTE
 -U _PyPyDateTime_DATE_GET_SECOND
+-U _PyPyDateTime_DATE_GET_TZINFO
 -U _PyPyDateTime_DELTA_GET_DAYS
 -U _PyPyDateTime_DELTA_GET_MICROSECONDS
 -U _PyPyDateTime_DELTA_GET_SECONDS
@@ -108,6 +184,7 @@
 -U _PyPyDateTime_TIME_GET_MICROSECOND
 -U _PyPyDateTime_TIME_GET_MINUTE
 -U _PyPyDateTime_TIME_GET_SECOND
+-U _PyPyDateTime_TIME_GET_TZINFO
 -U _PyPyDate_Check
 -U _PyPyDate_CheckExact
 -U _PyPyDate_FromTimestamp
@@ -150,7 +227,9 @@
 -U _PyPyErr_ExceptionMatches
 -U _PyPyErr_Fetch
 -U _PyPyErr_Format
+-U _PyPyErr_FormatV
 -U _PyPyErr_GetExcInfo
+-U _PyPyErr_GetHandledException
 -U _PyPyErr_GivenExceptionMatches
 -U _PyPyErr_NewException
 -U _PyPyErr_NewExceptionWithDoc
@@ -165,6 +244,7 @@
 -U _PyPyErr_SetFromErrnoWithFilename
 -U _PyPyErr_SetFromErrnoWithFilenameObject
 -U _PyPyErr_SetFromErrnoWithFilenameObjects
+-U _PyPyErr_SetHandledException
 -U _PyPyErr_SetInterrupt
 -U _PyPyErr_SetNone
 -U _PyPyErr_SetObject
@@ -181,6 +261,8 @@
 -U _PyPyEval_EvalCode
 -U _PyPyEval_GetBuiltins
 -U _PyPyEval_GetFrame
+-U _PyPyEval_GetFuncDesc
+-U _PyPyEval_GetFuncName
 -U _PyPyEval_GetGlobals
 -U _PyPyEval_GetLocals
 -U _PyPyEval_InitThreads
@@ -193,6 +275,7 @@
 -U _PyPyExc_AssertionError
 -U _PyPyExc_AttributeError
 -U _PyPyExc_BaseException
+-U _PyPyExc_BaseExceptionGroup
 -U _PyPyExc_BlockingIOError
 -U _PyPyExc_BrokenPipeError
 -U _PyPyExc_BufferError
@@ -204,7 +287,9 @@
 -U _PyPyExc_ConnectionResetError
 -U _PyPyExc_DeprecationWarning
 -U _PyPyExc_EOFError
+-U _PyPyExc_EncodingWarning
 -U _PyPyExc_Exception
+-U _PyPyExc_ExceptionGroup
 -U _PyPyExc_FileExistsError
 -U _PyPyExc_FileNotFoundError
 -U _PyPyExc_FloatingPointError
@@ -271,8 +356,21 @@
 -U _PyPyFloat_CheckExact
 -U _PyPyFloat_FromDouble
 -U _PyPyFloat_FromString
+-U _PyPyFloat_Pack2
+-U _PyPyFloat_Pack4
+-U _PyPyFloat_Pack8
 -U _PyPyFloat_Type
+-U _PyPyFloat_Unpack2
+-U _PyPyFloat_Unpack4
+-U _PyPyFloat_Unpack8
+-U _PyPyFrame_GetBuiltins
+-U _PyPyFrame_GetGenerator
+-U _PyPyFrame_GetGlobals
+-U _PyPyFrame_GetLasti
+-U _PyPyFrame_GetLineNumber
+-U _PyPyFrame_GetLocals
 -U _PyPyFrame_New
+-U _PyPyFrame_Type
 -U _PyPyFrozenSet_Check
 -U _PyPyFrozenSet_CheckExact
 -U _PyPyFrozenSet_New
@@ -280,9 +378,16 @@
 -U _PyPyFunction_Check
 -U _PyPyFunction_CheckExact
 -U _PyPyFunction_GetCode
+-U _PyPyFunction_GetGlobals
+-U _PyPyFunction_GetModule
 -U _PyPyFunction_Type
+-U _PyPyGC_Collect
+-U _PyPyGC_Disable
+-U _PyPyGC_Enable
+-U _PyPyGC_IsEnabled
 -U _PyPyGILState_Check
 -U _PyPyGILState_Ensure
+-U _PyPyGILState_GetThisThreadState
 -U _PyPyGILState_Release
 -U _PyPyGen_Check
 -U _PyPyGen_CheckExact
@@ -294,6 +399,7 @@
 -U _PyPyImport_GetModuleDict
 -U _PyPyImport_Import
 -U _PyPyImport_ImportModule
+-U _PyPyImport_ImportModuleLevel
 -U _PyPyImport_ImportModuleLevelObject
 -U _PyPyImport_ImportModuleNoBlock
 -U _PyPyImport_ReloadModule
@@ -308,6 +414,7 @@
 -U _PyPyInterpreterState_Next
 -U _PyPyIter_Check
 -U _PyPyIter_Next
+-U _PyPyIter_Send
 -U _PyPyList_Append
 -U _PyPyList_AsTuple
 -U _PyPyList_GET_ITEM
@@ -390,14 +497,19 @@
 -U _PyPyModule_AddFunctions
 -U _PyPyModule_AddIntConstant
 -U _PyPyModule_AddObject
+-U _PyPyModule_AddObjectRef
 -U _PyPyModule_AddStringConstant
 -U _PyPyModule_Check
 -U _PyPyModule_CheckExact
 -U _PyPyModule_Create2
 -U _PyPyModule_ExecDef
+-U _PyPyModule_FromDefAndSpec
+-U _PyPyModule_FromDefAndSpec2
 -U _PyPyModule_GetDef
 -U _PyPyModule_GetDict
+-U _PyPyModule_GetFilenameObject
 -U _PyPyModule_GetName
+-U _PyPyModule_GetNameObject
 -U _PyPyModule_GetState
 -U _PyPyModule_New
 -U _PyPyModule_NewObject
@@ -480,6 +592,8 @@
 -U _PyPyObject_Format
 -U _PyPyObject_Free
 -U _PyPyObject_GC_Del
+-U _PyPyObject_GC_IsFinalized
+-U _PyPyObject_GC_IsTracked
 -U _PyPyObject_GenericGetAttr
 -U _PyPyObject_GenericGetDict
 -U _PyPyObject_GenericSetAttr
@@ -565,6 +679,7 @@
 -U _PyPySlice_Type
 -U _PyPySlice_Unpack
 -U _PyPyState_AddModule
+-U _PyPyState_FindModule
 -U _PyPyState_RemoveModule
 -U _PyPyStaticMethod_New
 -U _PyPyStaticMethod_Type
@@ -584,13 +699,18 @@
 -U _PyPyThreadState_Clear
 -U _PyPyThreadState_Delete
 -U _PyPyThreadState_DeleteCurrent
+-U _PyPyThreadState_EnterTracing
 -U _PyPyThreadState_Get
 -U _PyPyThreadState_GetDict
+-U _PyPyThreadState_GetFrame
+-U _PyPyThreadState_GetID
+-U _PyPyThreadState_LeaveTracing
 -U _PyPyThreadState_New
 -U _PyPyThreadState_SetAsyncExc
 -U _PyPyThreadState_Swap
 -U _PyPyThread_ReInitTLS
 -U _PyPyThread_acquire_lock
+-U _PyPyThread_acquire_lock_timed
 -U _PyPyThread_allocate_lock
 -U _PyPyThread_create_key
 -U _PyPyThread_delete_key
@@ -624,8 +744,10 @@
 -U _PyPyType_GenericAlloc
 -U _PyPyType_GenericNew
 -U _PyPyType_GetModule
+-U _PyPyType_GetModuleByDef
 -U _PyPyType_GetModuleState
 -U _PyPyType_GetName
+-U _PyPyType_GetQualName
 -U _PyPyType_GetSlot
 -U _PyPyType_IsSubtype
 -U _PyPyType_Modified
@@ -637,6 +759,7 @@
 -U _PyPyUnicode_AsEncodedObject
 -U _PyPyUnicode_AsEncodedString
 -U _PyPyUnicode_AsLatin1String
+-U _PyPyUnicode_AsRawUnicodeEscapeString
 -U _PyPyUnicode_AsUCS4
 -U _PyPyUnicode_AsUCS4Copy
 -U _PyPyUnicode_AsUTF16String
@@ -663,6 +786,7 @@
 -U _PyPyUnicode_DecodeLatin1
 -U _PyPyUnicode_DecodeLocale
 -U _PyPyUnicode_DecodeLocaleAndSize
+-U _PyPyUnicode_DecodeRawUnicodeEscape
 -U _PyPyUnicode_DecodeUTF16
 -U _PyPyUnicode_DecodeUTF32
 -U _PyPyUnicode_DecodeUTF8
@@ -729,6 +853,7 @@
 -U _PyPy_FindMethod
 -U _PyPy_FrozenFlag
 -U _PyPy_GenericAlias
+-U _PyPy_GenericAliasType
 -U _PyPy_GetProgramName
 -U _PyPy_GetRecursionLimit
 -U _PyPy_GetVersion
@@ -737,6 +862,7 @@
 -U _PyPy_IncRef
 -U _PyPy_InspectFlag
 -U _PyPy_InteractiveFlag
+-U _PyPy_Is
 -U _PyPy_IsInitialized
 -U _PyPy_IsolatedFlag
 -U _PyPy_LeaveRecursiveCall
@@ -768,8 +894,10 @@
 -U _PyPy_UnbufferedStdioFlag
 -U _PyPy_VaBuildValue
 -U _PyPy_VerboseFlag
+-U _PyPy_Version
 -U _PySlice_AdjustIndices
 -U _PyState_FindModule
+-U _PyThreadState_GetInterpreter
 -U _PyThread_tss_alloc
 -U _PyThread_tss_create
 -U _PyThread_tss_delete
@@ -794,10 +922,13 @@
 -U __PyArg_UnpackStack
 -U __PyArg_VaParseTupleAndKeywordsFast
 -U __PyArg_VaParseTupleAndKeywordsFast_SizeT
+-U __PyDeadline_Get
+-U __PyDeadline_Init
 -U __PyExc_ArithmeticError
 -U __PyExc_AssertionError
 -U __PyExc_AttributeError
 -U __PyExc_BaseException
+-U __PyExc_BaseExceptionGroup
 -U __PyExc_BlockingIOError
 -U __PyExc_BrokenPipeError
 -U __PyExc_BufferError
@@ -809,7 +940,9 @@
 -U __PyExc_ConnectionResetError
 -U __PyExc_DeprecationWarning
 -U __PyExc_EOFError
+-U __PyExc_EncodingWarning
 -U __PyExc_Exception
+-U __PyExc_ExceptionGroup
 -U __PyExc_FileExistsError
 -U __PyExc_FileNotFoundError
 -U __PyExc_FloatingPointError
@@ -881,13 +1014,17 @@
 -U __PyPyDict_HasOnlyStringKeys
 -U __PyPyErr_FormatFromCause
 -U __PyPyErr_WriteUnraisableMsg
+-U __PyPyEval_GetAsyncGenFinalizer
+-U __PyPyEval_GetAsyncGenFirstiter
 -U __PyPyEval_SliceIndex
+-U __PyPyFloat_InitState
 -U __PyPyFloat_Unpack4
 -U __PyPyFloat_Unpack8
 -U __PyPyImport_AcquireLock
 -U __PyPyImport_ReleaseLock
 -U __PyPyList_Extend
 -U __PyPyLong_AsByteArrayO
+-U __PyPyLong_AsInt
 -U __PyPyLong_FromByteArray
 -U __PyPyLong_NumBits
 -U __PyPyLong_Sign
@@ -909,19 +1046,35 @@
 -U __PyPyPy_Malloc
 -U __PyPySet_Next
 -U __PyPySet_NextEntry
+-U __PyPyThreadState_GetDict
 -U __PyPyThreadState_UncheckedGet
 -U __PyPyTimeZone_FromTimeZone
 -U __PyPyTime_FromTime
 -U __PyPyTime_FromTimeAndFold
 -U __PyPyTuple_Resize
 -U __PyPyType_Lookup
+-U __PyPyType_Name
 -U __PyPyUnicode_EQ
 -U __PyPyUnicode_EqualToASCIIString
+-U __PyPyUnicode_IsAlpha
+-U __PyPyUnicode_IsDecimalDigit
+-U __PyPyUnicode_IsDigit
+-U __PyPyUnicode_IsLowercase
+-U __PyPyUnicode_IsNumeric
+-U __PyPyUnicode_IsPrintable
+-U __PyPyUnicode_IsTitlecase
+-U __PyPyUnicode_IsUppercase
 -U __PyPyUnicode_Ready
+-U __PyPyUnicode_ToDecimalDigit
+-U __PyPyUnicode_ToDigit
+-U __PyPyUnicode_ToLowercase
+-U __PyPyUnicode_ToTitlecase
+-U __PyPyUnicode_ToUppercase
 -U __PyPy_BuildValue_SizeT
 -U __PyPy_Dealloc
 -U __PyPy_EllipsisObject
 -U __PyPy_FalseStruct
+-U __PyPy_FatalErrorFunc
 -U __PyPy_HashDouble
 -U __PyPy_HashPointer
 -U __PyPy_IsFinalizing
@@ -940,26 +1093,43 @@
 -U __PyPy_subtype_dealloc
 -U __PyPy_tuple_dealloc
 -U __PyPy_tuple_new
+-U __PyTime_Add
 -U __PyTime_AsMicroseconds
 -U __PyTime_AsMilliseconds
+-U __PyTime_AsNanoseconds
 -U __PyTime_AsNanosecondsObject
 -U __PyTime_AsSecondsDouble
+-U __PyTime_AsTimespec
+-U __PyTime_AsTimespec_clamp
 -U __PyTime_AsTimeval
 -U __PyTime_AsTimevalTime_t
+-U __PyTime_AsTimeval_clamp
 -U __PyTime_AsTimeval_noraise
 -U __PyTime_FromMillisecondsObject
 -U __PyTime_FromNanoseconds
 -U __PyTime_FromNanosecondsObject
 -U __PyTime_FromSeconds
 -U __PyTime_FromSecondsObject
+-U __PyTime_FromTimespec
+-U __PyTime_FromTimeval
 -U __PyTime_GetMonotonicClock
 -U __PyTime_GetMonotonicClockWithInfo
+-U __PyTime_GetPerfCounter
+-U __PyTime_GetPerfCounterWithInfo
 -U __PyTime_GetSystemClock
 -U __PyTime_GetSystemClockWithInfo
 -U __PyTime_Init
+-U __PyTime_MulDiv
 -U __PyTime_ObjectToTime_t
 -U __PyTime_ObjectToTimespec
 -U __PyTime_ObjectToTimeval
 -U __PyTime_gmtime
 -U __PyTime_localtime
 -U __PyType_Name
+-U __PyUnicode_IsLinebreak
+-U __PyUnicode_IsWhitespace
+-U __PyUnicode_ToNumeric
+-U __Py_NewReference
+-U __Py_VaBuildStack
+-U __Py_VaBuildStack_SizeT
+-U __Py_ascii_whitespace
diff --git a/extern/nanobind/cmake/nanobind-config.cmake b/extern/nanobind/cmake/nanobind-config.cmake
index bc680e577..4baa3b7f9 100644
--- a/extern/nanobind/cmake/nanobind-config.cmake
+++ b/extern/nanobind/cmake/nanobind-config.cmake
@@ -4,6 +4,10 @@ if (NOT TARGET Python::Module)
   message(FATAL_ERROR "You must invoke 'find_package(Python COMPONENTS Interpreter Development REQUIRED)' prior to including nanobind.")
 endif()
 
+if (Python_VERSION VERSION_LESS "3.9")
+  message(FATAL_ERROR "nanobind requires Python 3.9 or newer (found Python ${Python_VERSION}).")
+endif()
+
 # Determine the right suffix for ordinary and stable ABI extensions.
 
 # We always need to know the extension
@@ -49,6 +53,14 @@ endif()
 # Extract Python version and extensions (e.g. free-threaded build)
 string(REGEX REPLACE "[^-]*-([^-]*)-.*" "\\1" NB_ABI "${NB_SOABI}")
 
+# Determine whether the interpreter was built without the GIL using the ABI tag
+# (free-threaded builds encode this using a trailing 't').
+set(NB_FREE_THREADED 0)
+
+if(NB_ABI MATCHES "[0-9]t")
+  set(NB_FREE_THREADED 1)
+endif()
+
 # If either suffix is missing, call Python to compute it
 if(NOT DEFINED NB_SUFFIX OR NOT DEFINED NB_SUFFIX_S)
   # Query Python directly to get the right suffix.
@@ -79,9 +91,10 @@ if(NOT DEFINED NB_SUFFIX OR NOT DEFINED NB_SUFFIX_S)
 endif()
 
 # Stash these for later use
-set(NB_SUFFIX   ${NB_SUFFIX}   CACHE INTERNAL "")
-set(NB_SUFFIX_S ${NB_SUFFIX_S} CACHE INTERNAL "")
-set(NB_ABI      ${NB_ABI}      CACHE INTERNAL "")
+set(NB_SUFFIX         ${NB_SUFFIX}         CACHE INTERNAL "")
+set(NB_SUFFIX_S       ${NB_SUFFIX_S}       CACHE INTERNAL "")
+set(NB_ABI            ${NB_ABI}            CACHE INTERNAL "")
+set(NB_FREE_THREADED  ${NB_FREE_THREADED} CACHE INTERNAL "")
 
 get_filename_component(NB_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
 get_filename_component(NB_DIR "${NB_DIR}" PATH)
@@ -189,13 +202,16 @@ function (nanobind_build_library TARGET_NAME)
     ${NB_DIR}/src/nb_ndarray.cpp
     ${NB_DIR}/src/nb_static_property.cpp
     ${NB_DIR}/src/nb_ft.h
-    ${NB_DIR}/src/nb_ft.cpp
     ${NB_DIR}/src/common.cpp
     ${NB_DIR}/src/error.cpp
     ${NB_DIR}/src/trampoline.cpp
     ${NB_DIR}/src/implicit.cpp
   )
 
+  if (NB_FREE_THREADED)
+    target_sources(${TARGET_NAME} PRIVATE ${NB_DIR}/src/nb_ft.cpp)
+  endif()
+
   if (TARGET_TYPE STREQUAL "SHARED")
     nanobind_link_options(${TARGET_NAME})
     target_compile_definitions(${TARGET_NAME} PRIVATE -DNB_BUILD)
@@ -246,15 +262,19 @@ function (nanobind_build_library TARGET_NAME)
   # However, if the directory _does_ exist, then the user is free to choose
   # whether nanobind uses them (based on `NB_USE_SUBMODULE_DEPS`), with a
   # preference to choose them if `NB_USE_SUBMODULE_DEPS` is not defined
-  if (NOT IS_DIRECTORY ${NB_DIR}/ext/robin_map/include OR
-      (DEFINED NB_USE_SUBMODULE_DEPS AND NOT NB_USE_SUBMODULE_DEPS))
+  if(IS_DIRECTORY ${NB_DIR}/ext/robin_map/include
+       AND (NOT DEFINED NB_USE_SUBMODULE_DEPS OR NB_USE_SUBMODULE_DEPS)
+       AND NOT TARGET tsl::robin_map)
+    add_library(tsl::robin_map INTERFACE IMPORTED)
+    set_target_properties(tsl::robin_map PROPERTIES
+      INTERFACE_INCLUDE_DIRECTORIES ${NB_DIR}/ext/robin_map/include)
+  endif()
+
+  if(NOT TARGET tsl::robin_map)
     include(CMakeFindDependencyMacro)
-    find_dependency(tsl-robin-map)
-    target_link_libraries(${TARGET_NAME} PRIVATE tsl::robin_map)
-  else()
-    target_include_directories(${TARGET_NAME} PRIVATE
-      ${NB_DIR}/ext/robin_map/include)
+    find_dependency(tsl-robin-map CONFIG REQUIRED)
   endif()
+  target_link_libraries(${TARGET_NAME} PRIVATE tsl::robin_map)
 
   target_include_directories(${TARGET_NAME} ${AS_SYSINCLUDE} PUBLIC
     ${Python_INCLUDE_DIRS}
@@ -352,7 +372,7 @@ function(nanobind_add_module name)
     set(ARG_STABLE_ABI FALSE)
   endif()
 
-  if (NB_ABI MATCHES "t")
+  if (NB_ABI MATCHES "[0-9]t")
     # Free-threaded Python interpreters don't support building a nanobind
     # module that uses the stable ABI.
     set(ARG_STABLE_ABI FALSE)
@@ -590,7 +610,7 @@ endfunction()
 # ---------------------------------------------------------------------------
 
 function (nanobind_add_stub name)
-  cmake_parse_arguments(PARSE_ARGV 1 ARG "VERBOSE;INCLUDE_PRIVATE;EXCLUDE_DOCSTRINGS;INSTALL_TIME;RECURSIVE;EXCLUDE_FROM_ALL" "MODULE;COMPONENT;PATTERN_FILE;OUTPUT_PATH" "PYTHON_PATH;DEPENDS;MARKER_FILE;OUTPUT")
+  cmake_parse_arguments(PARSE_ARGV 1 ARG "VERBOSE;INCLUDE_PRIVATE;EXCLUDE_DOCSTRINGS;EXCLUDE_VALUES;INSTALL_TIME;RECURSIVE;EXCLUDE_FROM_ALL" "MODULE;COMPONENT;PATTERN_FILE;OUTPUT_PATH" "PYTHON_PATH;DEPENDS;MARKER_FILE;OUTPUT")
 
   if (EXISTS ${NB_DIR}/src/stubgen.py)
     set(NB_STUBGEN "${NB_DIR}/src/stubgen.py")
@@ -614,6 +634,10 @@ function (nanobind_add_stub name)
     list(APPEND NB_STUBGEN_ARGS -D)
   endif()
 
+  if (ARG_EXCLUDE_VALUES)
+    list(APPEND NB_STUBGEN_ARGS --exclude-values)
+  endif()
+
   if (ARG_RECURSIVE)
     list(APPEND NB_STUBGEN_ARGS -r)
   endif()
diff --git a/extern/nanobind/docs/api_bazel.rst b/extern/nanobind/docs/api_bazel.rst
index 6aca46e88..4cd1bba9c 100644
--- a/extern/nanobind/docs/api_bazel.rst
+++ b/extern/nanobind/docs/api_bazel.rst
@@ -30,6 +30,9 @@ The main tool to build nanobind extensions is the ``nanobind_extension`` rule.
             srcs = [],
             copts = [],
             deps = [],
+            dynamic_deps = [],
+            linkstatic = True,
+            nanobind_link_mode = "auto",
             local_defines = [],
             **kwargs):
 
@@ -42,6 +45,8 @@ The main tool to build nanobind extensions is the ``nanobind_extension`` rule.
     different ABI domain, as described in the :ref:`FAQ <type-visibility>`
     section.
 
+    *New in nanobind-bazel v2.10.2: Added the "nanobind_link_mode" attribute.*
+
 To generate typing stubs for an extension, you can use the ``nanobind_stubgen``
 rule.
 
@@ -64,9 +69,9 @@ rule.
             exclude_docstrings = False,
             recursive = False):
 
-    It generates a `py_binary <https://bazel.build/reference/be/python#py_binary>`__ 
-    rule with a corresponding runfiles distribution, which invokes nanobind's 
-    builtin stubgen script, outputs a stub file and, optionally, 
+    It generates a `py_binary <https://bazel.build/reference/be/python#py_binary>`__
+    rule with a corresponding runfiles distribution, which invokes nanobind's
+    builtin stubgen script, outputs a stub file and, optionally,
     a typing marker file into ``output_directory`` (defaults to
     the build output directory, commonly called "bindir" in Bazel terms).
 
@@ -92,6 +97,8 @@ To build a C++ library with nanobind as a dependency, use the
             name,
             copts = [],
             deps = [],
+            linkstatic = False,
+            nanobind_link_mode = "auto",
             **kwargs):
 
     It corresponds directly to the builtin
@@ -99,6 +106,8 @@ To build a C++ library with nanobind as a dependency, use the
     with all keyword arguments being directly forwarded to a ``cc_library``
     target.
 
+    *New in nanobind-bazel v2.10.2: Added the "nanobind_link_mode" attribute.*
+
 To build a C++ shared library with nanobind as a dependency, use the
 ``nanobind_shared_library`` rule.
 
@@ -134,12 +143,12 @@ To build a C++ static library containing nanobind, use the
         def nanobind_static_library(name, deps, **kwargs):
 
     It corresponds directly to the builtin
-    `cc_static_library <https://bazel.build/reference/be/c-cpp#cc_static_library>`__ 
+    `cc_static_library <https://bazel.build/reference/be/c-cpp#cc_static_library>`__
     rule, with all keyword arguments being directly
     forwarded to a ``cc_static_library`` target.
 
     NB: This macro requires Bazel 7.4.0 or greater to use, as well as setting the
-    ``--experimental_cc_static_library`` flag for the build, since the 
+    ``--experimental_cc_static_library`` flag for the build, since the
     ``cc_static_library`` rule is considered experimental.
 
     *New in nanobind-bazel version 2.7.0.*
@@ -156,12 +165,17 @@ To build a C++ test target requiring nanobind, use the ``nanobind_test`` rule.
             name,
             copts = [],
             deps = [],
+            dynamic_deps = [],
+            linkstatic = False,
+            nanobind_link_mode = "auto",
             **kwargs):
 
     It corresponds directly to the builtin
     `cc_test <https://bazel.build/reference/be/c-cpp#cc_test>`__ rule, with all
     keyword arguments being directly forwarded to a ``cc_test`` target.
 
+    *New in nanobind-bazel v2.10.2: Added the "nanobind_link_mode" attribute.*
+
 .. _flags-bazel:
 
 Flags
@@ -181,5 +195,5 @@ following flag settings.
 
     Build nanobind extensions against the stable ABI of the configured Python
     version. Allowed values are ``"cp312"``, ``"cp313"``, and ``"cp314"``, which
-    target the stable ABI starting from CPython 3.12, 3.13, or 3.14 respectively. 
+    target the stable ABI starting from CPython 3.12, 3.13, or 3.14 respectively.
     By default, all extensions are built without any ABI limitations.
diff --git a/extern/nanobind/docs/api_core.rst b/extern/nanobind/docs/api_core.rst
index 23048737d..3b1850644 100644
--- a/extern/nanobind/docs/api_core.rst
+++ b/extern/nanobind/docs/api_core.rst
@@ -2158,9 +2158,6 @@ declarations in generated :ref:`stubs <stubs>`,
    See the section on :ref:`creating generic types <typing_generics_creating>`
    for an example.
 
-   This feature is only supported on Python 3.9+. Nanobind will ignore
-   the attribute in Python 3.8 builds.
-
 .. cpp:struct:: template <typename T> supplement
 
    Indicate that ``sizeof(T)`` bytes of memory should be set aside to
@@ -2190,6 +2187,9 @@ declarations in generated :ref:`stubs <stubs>`,
       Declares a callback that will be invoked when a C++ instance is first
       cast into a Python object.
 
+.. cpp:struct:: never_destruct
+
+   Disables destroying the instance.
 
 .. _enum_binding_annotations:
 
diff --git a/extern/nanobind/docs/api_extra.rst b/extern/nanobind/docs/api_extra.rst
index 83b17e990..47f4a9e75 100644
--- a/extern/nanobind/docs/api_extra.rst
+++ b/extern/nanobind/docs/api_extra.rst
@@ -1108,6 +1108,11 @@ convert into an equivalent representation in one of the following frameworks:
 
    Builtin Python ``memoryview`` for CPU-resident data.
 
+.. cpp:class:: array_api
+
+   An object that both implements the buffer protocol and also has the
+   ``__dlpack__`` and ``__dlpack_device__`` attributes.
+
 Eigen convenience type aliases
 ------------------------------
 
@@ -1561,6 +1566,16 @@ include directive:
    <https://docs.python.org/3/library/typing.html#typing.TypeVarTuple>`__
    (i.e., an instance of ``typing.TypeVarTuple``).
 
+.. cpp:function:: template <typename... Args> object param_spec(Args&&... args)
+
+   Analogous to :cpp:func:`type_var`, create a `parameter specification variable
+   <https://docs.python.org/3/library/typing.html#typing.ParamSpec>`__
+   (i.e., an instance of ``typing.ParamSpec``).
+
+   .. code-block:: cpp
+
+        m.attr("P") = nb::param_spec("P");
+
 .. cpp:function:: object any_type()
 
    Convenience wrapper, which returns ``typing.Any``.
diff --git a/extern/nanobind/docs/bazel.rst b/extern/nanobind/docs/bazel.rst
index 13d7b6de8..d820b360e 100644
--- a/extern/nanobind/docs/bazel.rst
+++ b/extern/nanobind/docs/bazel.rst
@@ -27,8 +27,8 @@ in your MODULE.bazel file:
     # Place this in your MODULE.bazel file.
     # The major version of nanobind-bazel is equal to the version
     # of the internally used nanobind.
-    # In this case, we are building bindings with nanobind v2.8.0.
-    bazel_dep(name = "nanobind_bazel", version = "2.8.0")
+    # In this case, we are building bindings with nanobind v2.11.0.
+    bazel_dep(name = "nanobind_bazel", version = "2.11.0")
 
 To instead use a development version from GitHub, you can declare the
 dependency as a ``git_override()`` in your MODULE.bazel:
@@ -139,6 +139,26 @@ Naturally, since stub generation relies on the given shared object files, the
 actual extensions are built in the process before invocation of the stub
 generation script.
 
+Controlling shared vs. static library production
+------------------------------------------------
+
+You can control how nanobind is linked to your extensions and libraries with the
+``nanobind_link_mode`` attribute of the ``nanobind_extension``, ``nanobind_library``,
+and ``nanobind_test`` macros.
+
+Setting ``nanobind_link_mode = "static"`` will link nanobind statically, while
+``nanobind_link_mode = "shared"`` will request linkage against a shared ``libnanobind.so``.
+The default, ``nanobind_link_mode = "auto"`` , will set the linkage for nanobind automatically
+based on the value of the given ``linkstatic`` attribute (where ``True`` requests static linkage,
+while ``False`` requests dynamic linkage).
+
+.. note::
+
+    Linking ``nanobind_extension`` s dynamically on macOS can fail because of undefined libpython
+    symbols referenced in the extension's object files. In that case, you can supply a linker
+    response file by using the ``nb_library_linkopts`` function from ``@nanobind_bazel//:helpers.bzl``
+    when setting your extension's ``linkopts``.
+
 Building extensions for free-threaded Python
 --------------------------------------------
 
diff --git a/extern/nanobind/docs/building.rst b/extern/nanobind/docs/building.rst
index 346ef5f01..91e5479ed 100644
--- a/extern/nanobind/docs/building.rst
+++ b/extern/nanobind/docs/building.rst
@@ -20,7 +20,7 @@ Preliminaries
 Begin by creating a new file named ``CMakeLists.txt`` in the root directory of
 your project. It should start with the following lines that declare a project
 name and tested CMake version range. The third line line searches for Python >=
-3.8 including the ``Development.Module`` component required by nanobind. The
+3.9 including the ``Development.Module`` component required by nanobind. The
 name of this module changed across CMake versions, hence the additional
 conditional check.
 
@@ -35,7 +35,7 @@ conditional check.
       set(DEV_MODULE Development.Module)
     endif()
 
-    find_package(Python 3.8 COMPONENTS Interpreter ${DEV_MODULE} REQUIRED)
+    find_package(Python 3.9 COMPONENTS Interpreter ${DEV_MODULE} REQUIRED)
 
 Add the following lines below. They configure CMake to perform an optimized
 *release* build by default unless another build type is specified. Without this
diff --git a/extern/nanobind/docs/changelog.rst b/extern/nanobind/docs/changelog.rst
index 06193a956..44bdd8531 100644
--- a/extern/nanobind/docs/changelog.rst
+++ b/extern/nanobind/docs/changelog.rst
@@ -15,6 +15,187 @@ case, both modules must use the same nanobind ABI version, or they will be
 isolated from each other. Releases that don't explicitly mention an ABI version
 below inherit that of the preceding release.
 
+Version 2.11.0 (Jan 29, 2026)
+-----------------------------
+
+- This release improves binding performance using CPython's *adaptive
+  specializing interpreter* (`PEP 659 <https://peps.python.org/pep-0659/>`__).
+  The speedups are automatic and require no changes to binding code:
+
+  .. list-table::
+     :header-rows: 1
+
+     * - Operation
+       - Speedup
+       - Requirements
+     * - Method calls
+       - **1.22x** faster
+       - Python 3.11+
+     * - Static attribute lookups
+       - **1.63x** faster
+       - Python 3.14+
+
+  This was achieved by making a number of nanobind-internal classes
+  (``nb_func``, ``nb_method``, ``nb_meta``, etc.) immutable, which allows
+  CPython to specialize generic ``LOAD_ATTR`` opcodes to faster type-specific
+  versions (``LOAD_ATTR_METHOD`` for method calls, ``LOAD_ATTR_CLASS`` for
+  static attribute lookups). (PR `#1257
+  <https://github.com/wjakob/nanobind/pull/1257>`__).
+
+- Added the :cpp:class:`nb::never_destruct <never_destruct>` class binding
+  annotation to inform nanobind that it should not bind the destructor. (PR
+  `#1251 <https://github.com/wjakob/nanobind/pull/1251>`__, commit `4ba51f
+  <https://github.com/wjakob/nanobind/commit/4ba51fcf795971c5d603d875ae4184bc0c9bd8e6>`__).
+
+- Argument annotations for ``std::optional<T>``-typed arguments now implicitly
+  have the :cpp:func:`.none() <arg::none>` annotation applied (i.e., no need to
+  additionally specify ``nb::arg("..").none()``). (PR `#1262
+  <https://github.com/wjakob/nanobind/pull/1262>`__, commit `425ca1
+  <https://github.com/wjakob/nanobind/commit/425ca1d10dfda60de122d681099500f6e9718985>`__).
+
+- Removed a redundant hash table type, reducing the size of libnanobind by
+  2.5KiB. (commit `4d53cd
+  <https://github.com/wjakob/nanobind/commit/4d53cd184a759129122d678466b7055aef3dfac6>`__).
+
+- Added Python 3.12-3.14 symbols to linker scripts. (commit `36d4a6
+  <https://github.com/wjakob/nanobind/commit/36d4a60bd1f9ecb4ac6c42489db29719c5b3d77a>`__).
+
+- Fixed a bug where ``call_guard`` could cause an extra copy of the return
+  value. (PR `#1249 <https://github.com/wjakob/nanobind/pull/1249>`__).
+
+- Don't link ``nb_ft.cpp`` in non-free-threaded builds to avoid linker warnings
+  about empty compilation units. (PR `#1271
+  <https://github.com/wjakob/nanobind/pull/1271>`__).
+
+- ABI version 18.
+
+- **Eigen type caster improvements**:
+
+  - Fixed conversion of size-zero vectors to ``Eigen::Map``/``Eigen::Ref`` on
+    NumPy 2.4. (PR `#1268 <https://github.com/wjakob/nanobind/pull/1268>`__).
+
+  - Fixed move construction of dense Eigen arrays. (commit `cb90753
+    <https://github.com/wjakob/nanobind/commit/cb90753953b767d5c0ab877a3e4d8ae4ae63211f>`__).
+
+- **Stub generation improvements**:
+
+  - Fixed *O(n²)* string concatenation performance issue.
+    (PR `#1275 <https://github.com/wjakob/nanobind/pull/1275>`__).
+
+  - Fixed enumerations with entries named ``name`` or ``value``.
+    (issue `#1246 <https://github.com/wjakob/nanobind/issues/1246>`__).
+
+  - Stubgen now preserves module-level docstrings. (commit `88771b
+    <https://github.com/wjakob/nanobind/commit/8771be7cf3c8420ba5a8c8aaa807a1b81437a6a3>`__).
+
+  - Extended the skip list by two additional enum attributes.
+    (PR `#1255 <https://github.com/wjakob/nanobind/pull/1255>`__).
+
+Version 2.10.2 (Dec 10, 2025)
+----------------------------
+
+- Fixes a regression that broke compilation on 32-bit architectures.
+  (PR `#1239 <https://github.com/wjakob/nanobind/pull/1239>`__).
+
+Version 2.10.1 (Dec 8, 2025)
+----------------------------
+
+- Nanobind now officially supports the **MinGW-w64** and **Intel ICX**
+  compilers. (PR `#1188 <https://github.com/wjakob/nanobind/pull/1188>`__).
+
+- Version 2.10 drops support for Python 3.8, which reached *End-Of-Life* in
+  October 2025. (PR `#1236 <https://github.com/wjakob/nanobind/pull/1236>`__).
+
+- The new :cpp:class:`nb::array_api <array_api>` framework tag can be used to
+  create an nd-array wrapper object that supports both the Python buffer
+  protocol and the DLPack methods ``__dlpack__`` and ``__dlpack_device__``.
+
+  Furthermore, nanobind now supports importing/exporting tensors via the legacy
+  (unversioned) DLPack interface, as well a new versioned interface. The latter
+  provides a flag indicating whether an nd-array is read-only. (PR `#1175
+  <https://github.com/wjakob/nanobind/pull/1175>`__).
+
+- Added ``bfloat`` to the nd-array import conversion code, fixing imports of
+  bfloat16 tensors. (PR `#1228
+  <https://github.com/wjakob/nanobind/pull/1228>`__).
+
+- nanobind now uses per-module precomputed constants, particularly strings, to
+  avoid costs from creating these repeatedly. This improves the performance of
+  nd-array and enumeration casts. (PR `#1184
+  <https://github.com/wjakob/nanobind/pull/1184>`__).
+
+- Fixed a segfault in garbage collection traversal of Python subclasses of
+  class bindings with :cpp:class:`nb::is_weak_referenceable
+  <is_weak_referenceable>`. (PR `#1206
+  <https://github.com/wjakob/nanobind/pull/1206>`__).
+
+- Fixed a potential reference leak in the ``std::array`` type caster. (commit
+  `bfacaf7
+  <https://github.com/wjakob/nanobind/commit/bfacaf75525c8a5e5f0a80fd69a985c4ae03d3d1>`__).
+
+- STL type casters now directly reject incorrectly sized inputs, which avoids
+  performance pitfalls when passing large arrays. (commit `edf5753
+  <https://github.com/wjakob/nanobind/commit/edf5753a13f98132b8da3d56fe94c31c678b2273>`__,
+  `dc35d69
+  <https://github.com/wjakob/nanobind/commit/dc35d69f65936280b2521941b2ce9d5ad16141d1>`__).
+
+- Fixed ``__new__`` overloads with variadic positional arguments but no
+  variadic keyword arguments, which incorrectly prevented nullary calls. (PR
+  `#1172 <https://github.com/wjakob/nanobind/pull/1172>`__).
+
+- Removed zero-length arrays to improve compiler compatibility. (PR `#1158
+  <https://github.com/wjakob/nanobind/pull/1158>`__).
+
+- Fixed a data race related caused by writes to a bit-field in free-threaded
+  extension builds (PR `#1191
+  <https://github.com/wjakob/nanobind/pull/1191>`__)
+
+- ABI version 17.
+
+- **Stub generation improvements**:
+
+  - Added a new ``--exclude-values`` flag that forces all values to be rendered
+    as ``...`` in stub files. (PR `#1185
+    <https://github.com/wjakob/nanobind/pull/1185>`__).
+
+  - Added support for ``typing.ParamSpec`` in generated stubs.
+    (PR `#1194 <https://github.com/wjakob/nanobind/pull/1194>`__).
+
+  - NumPy boolean arrays now use ``np.bool_`` dtype in generated stubs instead
+    of deprecated alternatives.
+    (commit `20fab93 <https://github.com/wjakob/nanobind/commit/20fab9386cd6c363878f67296d6b39a66af60a0a>`__).
+
+  - Auto-generated enum APIs are now excluded from stub files.
+    (PR `#1182 <https://github.com/wjakob/nanobind/pull/1182>`__).
+
+  - Pattern files now support ``__prefix__`` and ``__suffix__`` patterns
+    within classes for further customization of class stubs.
+    (PR `#1235 <https://github.com/wjakob/nanobind/pull/1235>`__).
+
+  - Various minor improvements to the stub generator.
+    (PR `#1179 <https://github.com/wjakob/nanobind/pull/1179>`__).
+
+- Fixed a regression in 2.10.0 (yanked release) related to handling of the ``NB_USE_SUBMODULE_DEPS``
+  flag that could cause CMake build system failures (commit `06aaa3
+  <https://github.com/wjakob/nanobind/commit/06aaa39dfd1a55ba546a96f0c3eebfced2c152c2>`__).
+
+- Minor/miscellaneous fixes: PRs `#1157
+  <https://github.com/wjakob/nanobind/pull/1157>`__, `#1186
+  <https://github.com/wjakob/nanobind/pull/1186>`__, `#1193
+  <https://github.com/wjakob/nanobind/pull/1193>`__, `#1198
+  <https://github.com/wjakob/nanobind/pull/1198>`__, `#1212
+  <https://github.com/wjakob/nanobind/pull/1212>`__, `#1218
+  <https://github.com/wjakob/nanobind/pull/1218>`__, `#1223
+  <https://github.com/wjakob/nanobind/pull/1223>`__, `#1225
+  <https://github.com/wjakob/nanobind/pull/1225>`__, commit `cf289b
+  <https://github.com/wjakob/nanobind/commit/cf289bbeb301a2d684e66fe6a4690932e2ae9df4>`__.
+
+
+Version 2.10.0 (Dec 8, 2025)
+----------------------------
+
+This release was yanked due to a regression.
+
 Version 2.9.2 (Sep 4, 2025)
 ---------------------------
 
@@ -26,6 +207,8 @@ This is a patch release to fix an issue in the new recursive stub generation fea
   submodules. However, the implemented submodule test was far too conservative
   and interpreted any imported module (e.g. ``import os``) as a submodule. The
   patch release fixes this.
+  (commit `a65e1b
+  <https://github.com/wjakob/nanobind/commit/a65e1b36ec0670e7c8d7a3bacfa5cff425fe92fe>`__).
 
 Version 2.9.1 (Sep 4, 2025)
 ---------------------------
diff --git a/extern/nanobind/docs/classes.rst b/extern/nanobind/docs/classes.rst
index fb6fc8abb..c760d7540 100644
--- a/extern/nanobind/docs/classes.rst
+++ b/extern/nanobind/docs/classes.rst
@@ -1141,3 +1141,43 @@ Two limitations of :cpp:struct:`nb::new_ <new_>` are worth noting:
    just helps unpickling work. If your first :cpp:struct:`nb::new_ <new_>`
    method is one that takes no arguments, then nanobind won't add its own,
    and you'll have to deal with unpickling some other way.
+
+Preventing object destruction
+-----------------------------
+
+In rare cases, you may need to bind a class that should never be destructed
+by nanobind:
+
+.. code-block:: cpp
+
+   class Singleton {
+   public:
+       static Singleton &get_instance();
+   };
+
+You may use the :cpp:class:`nb::never_destruct <never_destruct>` annotation to
+make nanobind aware of this. This feature is particularly helpful when attempts
+to bind the destructor would fail with a compilation error (e.g., because this
+would require access to implementation details that are not available in the
+current compilation unit).
+
+.. code-block:: cpp
+
+   nb::class_<Singleton>(m, "Singleton", nb::never_destruct())
+       .def_static("get_instance", &Singleton::get_instance, nb::rv_policy::reference);
+
+.. warning::
+
+   Instance class marked with :cpp:class:`nb::never_destruct <never_destruct>`
+   must be returned using the :cpp:enumerator:`reference
+   <rv_policy::reference>` return value policy. Otherwise, nanobind will assume
+   ownership, which includes the requirement of destructing the object at
+   a later point.
+
+   Similarly, you must not bind constructors or copy constructors, as the
+   eventual garbage collection of constructed instances would require calling
+   the destructor.
+
+   nanobind will abort with a fatal error if it is ever put into a situation
+   where an object with the :cpp:class:`nb::never_destruct <never_destruct>`
+   annotation must be destructed.
diff --git a/extern/nanobind/docs/exceptions.rst b/extern/nanobind/docs/exceptions.rst
index 3e97df67f..5b14276a1 100644
--- a/extern/nanobind/docs/exceptions.rst
+++ b/extern/nanobind/docs/exceptions.rst
@@ -38,6 +38,8 @@ that convert to specific Python exceptions as shown below:
     - ``ValueError``
   * - ``std::overflow_error``
     - ``OverflowError``
+  * - ``std::runtime_error``
+    - ``RuntimeError``
   * - :cpp:func:`nb::stop_iteration <stop_iteration>`
     - ``StopIteration`` (used to implement custom iterator) 
   * - :cpp:func:`nb::index_error <index_error>`
@@ -264,8 +266,8 @@ Should they throw or fail to catch any exceptions in their call graph,
 the C++ runtime calls ``std::terminate()`` to abort immediately.
 
 Similarly, Python exceptions raised in a class's ``__del__`` method do not
-propagate, but are logged by Python as an unraisable error. In Python 3.8+, a
-`system hook is triggered
+propagate, but are logged by Python as an unraisable error. A `system hook is
+triggered
 <https://docs.python.org/3/library/sys.html#sys.unraisablehook>`_
 and an auditing event is logged.
 
diff --git a/extern/nanobind/docs/index.rst b/extern/nanobind/docs/index.rst
index f90ddea80..c715b8522 100644
--- a/extern/nanobind/docs/index.rst
+++ b/extern/nanobind/docs/index.rst
@@ -54,12 +54,13 @@ similar runtime performance).
 
 nanobinds depends on
 
-- **Python 3.8+** or **PyPy 7.3.10+** (the *3.8* and *3.9* PyPy flavors are
+- **Python 3.9+** or **PyPy 7.3.10+** (the *3.9* and *3.10* PyPy flavors are
   supported, though there are :ref:`some limitations <pypy_issues>`).
 - **CMake 3.15+**.
-- **A C++17 compiler**: Clang 8+, GCC 8+, MSVC2019+, and the CUDA NVCC compiler
-  are officially supported. Others (MinGW, Cygwin, Intel, ..) may work as well
-  but will not receive support.
+- **A C++17 compiler**: Clang 8+, GCC 8+, MSVC2019+, MinGW-w64, Intel ICX
+  (the modern Clang-based Intel compiler), and the CUDA NVCC compiler are
+  officially supported. Others (Cygwin, older Intel compilers, ..) may work
+  as well but will not receive support.
 
 .. only:: not latex
 
diff --git a/extern/nanobind/docs/ndarray.rst b/extern/nanobind/docs/ndarray.rst
index e62fcb373..f4362d7f7 100644
--- a/extern/nanobind/docs/ndarray.rst
+++ b/extern/nanobind/docs/ndarray.rst
@@ -275,12 +275,19 @@ desired Python type.
 - :cpp:class:`nb::tensorflow <tensorflow>`: create a ``tensorflow.python.framework.ops.EagerTensor``.
 - :cpp:class:`nb::jax <jax>`: create a ``jaxlib.xla_extension.DeviceArray``.
 - :cpp:class:`nb::cupy <cupy>`: create a ``cupy.ndarray``.
+- :cpp:class:`nb::memview <memview>`: create a Python ``memoryview``.
+- :cpp:class:`nb::array_api <array_api>`: create an object that supports the
+  Python buffer protocol (i.e., is accepted as an argument to ``memoryview()``)
+  and also has the DLPack attributes  ``__dlpack__`` and ``__dlpack_device__``
+  (i.e., it is accepted as an argument to a framework's ``from_dlpack()``
+  function).
 - No framework annotation. In this case, nanobind will create a raw Python
   ``dltensor`` `capsule <https://docs.python.org/3/c-api/capsule.html>`__
-  representing the `DLPack <https://github.com/dmlc/dlpack>`__ metadata.
+  representing the `DLPack <https://github.com/dmlc/dlpack>`__ metadata of
+  a ``DLManagedTensor``.
 
 This annotation also affects the auto-generated docstring of the function,
-which in this case becomes:
+which in this example's case becomes:
 
 .. code-block:: python
 
@@ -458,6 +465,21 @@ interpreted as follows:
 - :cpp:enumerator:`rv_policy::move` is unsupported and demoted to
   :cpp:enumerator:`rv_policy::copy`.
 
+Note that when a copy is returned, the copy is made by the framework, not by
+nanobind itself.
+For example, ``numpy.array()`` is passed the keyword argument ``copy`` with
+value ``True``, or the PyTorch tensor's ``clone()`` method is immediately
+called to create the copy.
+This design has a couple of advantages.
+First, nanobind does not have a build-time dependency on the libraries and
+frameworks (NumPy, PyTorch, CUDA, etc.) that would otherwise be necessary
+to perform the copy.
+Second, frameworks have the opportunity to optimize how the copy is created.
+The copy is owned by the framework, so the framework can choose to use a custom
+memory allocator, over-align the data, etc. based on the nd-array's size,
+the specific CPU, GPU, or memory types detected, etc.
+
+
 .. _ndarray-temporaries:
 
 Returning temporaries
@@ -643,26 +665,80 @@ support inter-framework data exchange, custom array types should implement the
 - `__dlpack__ <https://data-apis.org/array-api/latest/API_specification/generated/array_api.array.__dlpack__.html#array_api.array.__dlpack__>`__ and
 - `__dlpack_device__ <https://data-apis.org/array-api/latest/API_specification/generated/array_api.array.__dlpack_device__.html#array_api.array.__dlpack_device__>`__
 
-methods. This is easy thanks to the nd-array integration in nanobind. An example is shown below:
+methods.
+These, as well as the buffer protocol, are implemented in the object returned
+by nanobind when specifying :cpp:class:`nb::array_api <array_api>` as the
+framework template parameter.
+For example:
+
+.. code-block:: cpp
+
+    class MyArray {
+        double* d;
+     public:
+        MyArray() { d = new double[5] { 0.0, 1.0, 2.0, 3.0, 4.0 }; }
+        ~MyArray() { delete[] d; }
+        double* data() const { return d; }
+    };
+
+    nb::class_<MyArray>(m, "MyArray")
+       .def(nb::init<>())
+       .def("array_api", [](const MyArray& self) {
+               return nb::ndarray<nb::array_api, double>(self.data(), {5});
+           }, nb::rv_policy::reference_internal);
+
+which can be used as follows:
+
+.. code-block:: pycon
+
+    >>> import my_extension
+    >>> ma = my_extension.MyArray()
+    >>> aa = ma.array_api()
+    >>> aa.__dlpack_device__()
+    (1, 0)
+    >>> import numpy as np
+    >>> x = np.from_dlpack(aa)
+    >>> x
+    array([0., 1., 2., 3., 4.])
+
+The DLPack methods can also be provided for the class itself, by implementing
+``__dlpack__()`` as a wrapper function.
+For example, by adding the following lines to the binding:
 
 .. code-block:: cpp
 
-   nb::class_<MyArray>(m, "MyArray")
-      // ...
-      .def("__dlpack__", [](nb::kwargs kwargs) {
-          return nb::ndarray<>( /* ... */);
-      })
-      .def("__dlpack_device__", []() {
-          return std::make_pair(nb::device::cpu::value, 0);
-      });
+       .def("__dlpack__", [](nb::pointer_and_handle<MyArray> self,
+                             nb::kwargs kwargs) {
+               using array_api_t = nb::ndarray<nb::array_api, double>;
+               nb::object aa = nb::cast(array_api_t(self.p->data(), {5}),
+                                        nb::rv_policy::reference_internal,
+                                        self.h);
+               return aa.attr("__dlpack__")(**kwargs);
+           })
+       .def("__dlpack_device__", [](nb::handle /*self*/) {
+               return std::make_pair(nb::device::cpu::value, 0);
+           })
 
-Returning a raw :cpp:class:`nb::ndarray <ndarray>` without framework annotation
-will produce a DLPack capsule, which is what the interface expects.
+the class can be used as follows:
+
+.. code-block:: pycon
+
+    >>> import my_extension
+    >>> ma = my_extension.MyArray()
+    >>> ma.__dlpack_device__()
+    (1, 0)
+    >>> import numpy as np
+    >>> y = np.from_dlpack(ma)
+    >>> y
+    array([0., 1., 2., 3., 4.])
+
+
+The ``kwargs`` argument in the implementation of ``__dlpack__`` above can be
+used to support additional parameters (e.g., to allow the caller to request a
+copy).  See
+`__dlpack__() <https://data-apis.org/array-api/latest/API_specification/generated/array_api.array.__dlpack__.html>`__
+in the Python array API standard for details.
 
-The ``kwargs`` argument can be used to provide additional parameters (for
-example to request a copy), please see the DLPack documentation for details.
-Note that nanobind does not yet implement the versioned DLPack protocol. The
-version number should be ignored for now.
 
 Frequently asked questions
 --------------------------
@@ -708,7 +784,3 @@ be more restrictive. Presently supported dtypes include signed/unsigned
 integers, floating point values, complex numbers, and boolean values. Some
 :ref:`nonstandard arithmetic types <ndarray-nonstandard>` can be supported as
 well.
-
-Nanobind can receive and return *read-only* arrays via the buffer protocol when
-exhanging data with NumPy. The DLPack interface currently ignores this
-annotation.
diff --git a/extern/nanobind/docs/packaging.rst b/extern/nanobind/docs/packaging.rst
index 95d949d48..b63c5e496 100644
--- a/extern/nanobind/docs/packaging.rst
+++ b/extern/nanobind/docs/packaging.rst
@@ -101,7 +101,7 @@ An example is shown below:
    version = "0.0.1"
    description = "A brief description of what this project does"
    readme = "README.md"
-   requires-python = ">=3.8"
+   requires-python = ">=3.9"
    authors = [
        { name = "Your Name", email = "your.email@address.com" },
    ]
@@ -178,7 +178,7 @@ component that can be used to create `stable ABI
 .. code-block:: cmake
 
    # Try to import all Python components potentially needed by nanobind
-   find_package(Python 3.8
+   find_package(Python 3.9
      REQUIRED COMPONENTS Interpreter Development.Module
      OPTIONAL_COMPONENTS Development.SABIModule)
 
@@ -316,7 +316,6 @@ block to remove incompatible configurations from the matrix:
 
 .. code-block:: toml
 
-    skip = ["cp38-*", "pp38-*"] # Skip CPython and PyPy 3.8
     archs = ["auto64"]          # Only target 64 bit architectures
 
 The `cibuildwheel documentation
diff --git a/extern/nanobind/docs/porting.rst b/extern/nanobind/docs/porting.rst
index eb3b9061c..368d05db6 100644
--- a/extern/nanobind/docs/porting.rst
+++ b/extern/nanobind/docs/porting.rst
@@ -149,6 +149,11 @@ by always passing such objects across the Python/C++ boundary as
 ``std::shared_ptr<T>`` rather than as ``T*``. See the :ref:`advanced section
 on object ownership <enable_shared_from_this>` for more details.
 
+``py::nodelete`` was used with holders in pybind11 to prevent destroying
+instances. nanobind can automatically detect this in some cases, but to
+be certain you can use the :cpp:class:`never_destruct` when binding the
+class.
+
 Custom constructors
 -------------------
 In pybind11, custom constructors (i.e. ones that do not already exist in the
diff --git a/extern/nanobind/docs/refleaks.rst b/extern/nanobind/docs/refleaks.rst
index 1624fa8d3..1ce5e950f 100644
--- a/extern/nanobind/docs/refleaks.rst
+++ b/extern/nanobind/docs/refleaks.rst
@@ -337,11 +337,9 @@ Here is an example of the required code for a ``Wrapper`` type:
    struct Wrapper { std::shared_ptr<Wrapper> value; };
 
    int wrapper_tp_traverse(PyObject *self, visitproc visit, void *arg) {
-       // On Python 3.9+, we must traverse the implicit dependency
-       // of an object on its associated type object.
-       #if PY_VERSION_HEX >= 0x03090000
-           Py_VISIT(Py_TYPE(self));
-       #endif
+       // We must traverse the implicit dependency of an object on its
+       // associated type object.
+       Py_VISIT(Py_TYPE(self));
 
        // The tp_traverse method may be called after __new__ but before or during
        // __init__, before the C++ constructor has been completed. We must not
@@ -454,28 +452,6 @@ how deal with them. For completeness, let's consider some other possibilities.
   should be fixed in the responsible framework so that leak warnings aren't
   cluttered with flukes and can be more broadly useful.
 
-- **Older Python versions**: Very old Python versions (e.g., 3.8) don't
-  do a good job cleaning up global references when the interpreter shuts down.
-  The following code may leak a reference if it is a top-level statement in a
-  Python file or the REPL.
-
-  .. code-block:: python
-
-     a = my_ext.MyObject()
-
-  Such a warning is benign and does not indicate an actual leak. It simply
-  highlights a flaws in the interpreter shutdown logic of old Python versions.
-  Wrap your code into a function to address this issue even on such versions:
-
-  .. code-block:: python
-
-     def run():
-         a = my_ext.MyObject()
-         # ...
-
-     if __name__ == '__main__':
-         run()
-
 - **Exceptions**. Some exceptions such as ``AttributeError`` have been observed
   to hold references, e.g. to the object which lacked the desired attribute. If
   the last exception raised by the program references a nanobind instance, then
diff --git a/extern/nanobind/docs/typing.rst b/extern/nanobind/docs/typing.rst
index 92b5af55e..cb89d9be6 100644
--- a/extern/nanobind/docs/typing.rst
+++ b/extern/nanobind/docs/typing.rst
@@ -540,7 +540,7 @@ The program has the following command line options:
 .. code-block:: text
 
    usage: python -m nanobind.stubgen [-h] [-o FILE] [-O PATH] [-i PATH] [-m MODULE]
-                                     [-r] [-M FILE] [-P] [-D] [-q]
+                                     [-r] [-M FILE] [-P] [-D] [--exclude-values] [-q]
 
    Generate stubs for nanobind-based extensions.
 
@@ -559,6 +559,7 @@ The program has the following command line options:
      -P, --include-private         include private members (with single leading or
                                    trailing underscore)
      -D, --exclude-docstrings      exclude docstrings from the generated stub
+     --exclude-values              force the use of ... for values
      -q, --quiet                   do not generate any output in the absence of failures
 
 
@@ -713,6 +714,6 @@ you may use the special ``\from`` escape code to import them:
        def lookup(array: Array[T], index: Literal[0] = 0) -> _Opt[T]:
            \doc
 
-You may also add free-form text the beginning or the end of the generated stub.
-To do so, add an entry that matches on ``module_name.__prefix__`` or
-``module_name.__suffix__``.
+You may also add free-form text the beginning or the end of the generated stub
+module or of a class. To do so, add an entry that matches on ``name.__prefix__``
+or ``name.__suffix__`` where ``name`` is the name of the module or class.
diff --git a/extern/nanobind/docs/why.rst b/extern/nanobind/docs/why.rst
index 5f58e2c6e..79e955c75 100644
--- a/extern/nanobind/docs/why.rst
+++ b/extern/nanobind/docs/why.rst
@@ -122,14 +122,12 @@ nanobind includes a number of quality-of-life improvements for developers:
 
 - **Stable ABI**: nanobind can target Python's `stable ABI interface
   <https://docs.python.org/3/c-api/stable.html>`__ starting with Python 3.12.
-  This means that extension modules will be compatible with future version of
-  Python without having to compile separate binaries per interpreter. That
-  vision is still relatively far out, however: it will require Python 3.12+ to
-  be widely deployed.
+  This means that extension modules are compatible with later version of
+  Python without having to compile separate binaries per interpreter.
 
 - **Stub generation**: nanobind ships with a custom :ref:`stub generator
-  <stubs>` and CMake integration to automatically create high quality stubs as
-  part of the build process. `Stubs
+  <stubs>` and CMake integration to automatically create high quality type
+  stubs as part of the build process. `Stubs
   <https://typing.readthedocs.io/en/latest/source/stubs.html>`__ make compiled
   extension code compatible with visual autocomplete in editors like `Visual
   Studio Code <https://code.visualstudio.com>`__ and static type checkers like
@@ -140,9 +138,9 @@ nanobind includes a number of quality-of-life improvements for developers:
 - **Smart pointers, ownership, etc.**: corner cases in pybind11 related to
   smart/unique pointers and callbacks could lead to undefined behavior. A later
   pybind11 redesign (``smart_holder``) was able to address these problems, but
-  this came at the cost of further increased runtime overheads. The object
-  ownership model of nanobind avoids this undefined behavior without penalizing
-  runtime performance.
+  this came at the cost of further increased binary size and runtime overheads.
+  The object ownership model of nanobind avoids this undefined behavior without
+  penalizing performance.
 
 - **Leak warnings**: When the Python interpreter shuts down, nanobind reports
   instance, type, and function leaks related to bindings, which is useful for
@@ -178,42 +176,6 @@ Minor additions
 
 The following lists minor-but-useful additions relative to pybind11.
 
-- **Finding Python objects associated with a C++ instance**: In addition to all
-  of the return value policies supported by pybind11, nanobind provides one
-  additional policy named :cpp:enumerator:`nb::rv_policy::none
-  <rv_policy::none>` that *only* succeeds when the return value is already a
-  known/registered Python object. In other words, this policy will never
-  attempt to move, copy, or reference a C++ instance by constructing a new
-  Python object.
-
-  The new :cpp:func:`nb::find() <find>` function encapsulates this behavior. It
-  resembles :cpp:func:`nb::cast() <cast>` in the sense that it returns the
-  Python object associated with a C++ instance. But while :cpp:func:`nb::cast()
-  <cast>` will create that Python object if it doesn't yet exist,
-  :cpp:func:`nb::find() <find>` will return a ``nullptr`` object. This function
-  is useful to interface with Python's :ref:`cyclic garbage collector
-  <fixing_refleaks>`.
-
-- **Parameterized wrappers**: The :cpp:class:`nb::handle_t\<T\> <handle_t>` type
-  behaves just like the :cpp:class:`nb::handle <handle>` class and wraps a
-  ``PyObject *`` pointer. However, when binding a function that takes such an
-  argument, nanobind will only call the associated function overload when the
-  underlying Python object wraps a C++ instance of type ``T``.
-
-  Similarly, the :cpp:class:`nb::type_object_t\<T\> <type_object_t>` type
-  behaves just like the :cpp:class:`nb::type_object <type_object>` class and
-  wraps a ``PyTypeObject *`` pointer. However, when binding a function that
-  takes such an argument, nanobind will only call the associated function
-  overload when the underlying Python type object is a subtype of the C++ type
-  ``T``.
-
-  Finally, the :cpp:class:`nb::typed\<T, Ts...\> <typed>` annotation can 
-  parameterize any other type. The feature exists to improve the
-  expressiveness of type signatures (e.g., to turn ``list`` into
-  ``list[int]``). Note, however, that nanobind does not perform additional
-  runtime checks in this case. Please see the section on :ref:`parameterizing
-  generics <typing_generics_parameterizing>` for further details.
-
 - **Signature overrides**: it may sometimes be necessary to tweak the
   type signature of a class or function to provide richer type information to
   static type checkers like `MyPy <https://github.com/python/mypy>`__ or
@@ -240,6 +202,42 @@ The following lists minor-but-useful additions relative to pybind11.
   <typing_signature_functions>` and :ref:`class signatures
   <typing_signature_classes>` for further details.
 
+- **Parameterized wrappers**: The :cpp:class:`nb::handle_t\<T\> <handle_t>` type
+  behaves just like the :cpp:class:`nb::handle <handle>` class and wraps a
+  ``PyObject *`` pointer. However, when binding a function that takes such an
+  argument, nanobind will only call the associated function overload when the
+  underlying Python object wraps a C++ instance of type ``T``.
+
+  Similarly, the :cpp:class:`nb::type_object_t\<T\> <type_object_t>` type
+  behaves just like the :cpp:class:`nb::type_object <type_object>` class and
+  wraps a ``PyTypeObject *`` pointer. However, when binding a function that
+  takes such an argument, nanobind will only call the associated function
+  overload when the underlying Python type object is a subtype of the C++ type
+  ``T``.
+
+  Finally, the :cpp:class:`nb::typed\<T, Ts...\> <typed>` annotation can
+  parameterize any other type. The feature exists to improve the
+  expressiveness of type signatures (e.g., to turn ``list`` into
+  ``list[int]``). Note, however, that nanobind does not perform additional
+  runtime checks in this case. Please see the section on :ref:`parameterizing
+  generics <typing_generics_parameterizing>` for further details.
+
+- **Finding Python objects associated with a C++ instance**: In addition to all
+  of the return value policies supported by pybind11, nanobind provides one
+  additional policy named :cpp:enumerator:`nb::rv_policy::none
+  <rv_policy::none>` that *only* succeeds when the return value is already a
+  known/registered Python object. In other words, this policy will never
+  attempt to move, copy, or reference a C++ instance by constructing a new
+  Python object.
+
+  The new :cpp:func:`nb::find() <find>` function encapsulates this behavior. It
+  resembles :cpp:func:`nb::cast() <cast>` in the sense that it returns the
+  Python object associated with a C++ instance. But while :cpp:func:`nb::cast()
+  <cast>` will create that Python object if it doesn't yet exist,
+  :cpp:func:`nb::find() <find>` will return a ``nullptr`` object. This function
+  is useful to interface with Python's :ref:`cyclic garbage collector
+  <fixing_refleaks>`.
+
 TLDR
 ----
 
@@ -248,5 +246,8 @@ nanobind. Fixing all the long-standing issues in pybind11 (see above list)
 would require a substantial redesign and years of careful work by a team of C++
 metaprogramming experts. At the same time, changing anything in pybind11 is
 extremely hard because of the large number of downstream users and their
-requirements on API/ABI stability. I personally don't have the time and
-energy to fix pybind11 and have moved my focus to this project.
+requirements on API/ABI stability. I personally don't have the time and energy
+to fix pybind11 and have moved my focus to this project. The `testimonials
+section
+<https://github.com/wjakob/nanobind/blob/master/README.md#testimonials>` lists
+the experience of a number of large projects that made the switch.
diff --git a/extern/nanobind/include/nanobind/eigen/dense.h b/extern/nanobind/include/nanobind/eigen/dense.h
index b8224cb58..6570ff44c 100644
--- a/extern/nanobind/include/nanobind/eigen/dense.h
+++ b/extern/nanobind/include/nanobind/eigen/dense.h
@@ -182,7 +182,7 @@ struct type_caster<T, enable_if_t<is_eigen_plain_v<T> &&
 
         object owner;
         if (policy == rv_policy::move) {
-            T *temp = new T(std::move(v));
+            T *temp = new T((T&&) v);
             owner = capsule(temp, [](void *p) noexcept { delete (T *) p; });
             ptr = temp->data();
             policy = rv_policy::reference;
@@ -324,6 +324,13 @@ struct type_caster<Eigen::Map<T, Options, StrideType>,
         if constexpr (IS == 0)
             inner = 0;
 
+        // Starting from numpy 2.4, dl_tensors' stride field is *always* set (for ndim > 0).
+        // This also includes when shape=(0,0), when numpy reports the stride to be zero.
+        // This creates an incompatibility with Eigen compile-time vectors, which expect
+        // runtime and compile-time strides to be identical (e.g. for Eigen::VectorXi, equal to 1).
+        if (ndim_v<T> == 1 && caster.value.shape(0) == 0)
+            inner = IS;
+
         if constexpr (OS == 0)
             outer = 0;
 
diff --git a/extern/nanobind/include/nanobind/intrusive/ref.h b/extern/nanobind/include/nanobind/intrusive/ref.h
index a6f78b8a0..9e3ef70f5 100644
--- a/extern/nanobind/include/nanobind/intrusive/ref.h
+++ b/extern/nanobind/include/nanobind/intrusive/ref.h
@@ -119,7 +119,7 @@ template <typename T> class ref {
     T *m_ptr = nullptr;
 };
 
-// Registar a type caster for ``ref<T>`` if nanobind was previously #included
+// Register a type caster for ``ref<T>`` if nanobind was previously #included
 #if defined(NB_VERSION_MAJOR)
 NAMESPACE_BEGIN(detail)
 template <typename T> struct type_caster<nanobind::ref<T>> {
diff --git a/extern/nanobind/include/nanobind/nanobind.h b/extern/nanobind/include/nanobind/nanobind.h
index 66d2014f3..12807690f 100644
--- a/extern/nanobind/include/nanobind/nanobind.h
+++ b/extern/nanobind/include/nanobind/nanobind.h
@@ -22,13 +22,14 @@
 #endif
 
 #define NB_VERSION_MAJOR 2
-#define NB_VERSION_MINOR 9
-#define NB_VERSION_PATCH 2
+#define NB_VERSION_MINOR 11
+#define NB_VERSION_PATCH 0
 #define NB_VERSION_DEV   0 // A value > 0 indicates a development release
 
 // Core C++ headers that nanobind depends on
 #include <cstddef>
 #include <cstdint>
+#include <cstdlib>
 #include <exception>
 #include <stdexcept>
 #include <type_traits>
diff --git a/extern/nanobind/include/nanobind/nb_attr.h b/extern/nanobind/include/nanobind/nb_attr.h
index a69df8693..5ff5b706c 100644
--- a/extern/nanobind/include/nanobind/nb_attr.h
+++ b/extern/nanobind/include/nanobind/nb_attr.h
@@ -123,6 +123,7 @@ struct is_final {};
 struct is_generic {};
 struct kw_only {};
 struct lock_self {};
+struct never_destruct {};
 
 template <size_t /* Nurse */, size_t /* Patient */> struct keep_alive {};
 template <typename T> struct supplement {};
@@ -219,7 +220,7 @@ struct arg_data {
     uint8_t flag;
 };
 
-template <size_t Size> struct func_data_prelim {
+struct func_data_prelim_base {
     // A small amount of space to capture data used by the function/closure
     void *capture[3];
 
@@ -245,12 +246,12 @@ template <size_t Size> struct func_data_prelim {
     /// for each of these.
     uint16_t nargs;
 
-    /// Number of paramters to the C++ function that may be filled from
-    /// Python positional arguments without additional ceremony. nb::args and
-    /// nb::kwargs parameters are not counted in this total, nor are any
-    /// parameters after nb::args or after a nb::kw_only annotation.
-    /// The parameters counted here may be either named (nb::arg("name"))
-    /// or unnamed (nb::arg()). If unnamed, they are effectively positional-only.
+    /// Number of parameters to the C++ function that may be filled from
+    /// Python positional arguments without additional ceremony.
+    /// nb::args and nb::kwargs parameters are not counted in this total, nor
+    /// are any parameters after nb::args or after a nb::kw_only annotation.
+    /// The parameters counted here may be either named (nb::arg("name")) or
+    /// unnamed (nb::arg()).  If unnamed, they are effectively positional-only.
     /// nargs_pos is always <= nargs.
     uint16_t nargs_pos;
 
@@ -259,42 +260,15 @@ template <size_t Size> struct func_data_prelim {
     const char *name;
     const char *doc;
     PyObject *scope;
+};
 
-    // *WARNING*: nanobind regularly receives requests from users who run it
-    // through Clang-Tidy, or who compile with increased warnings levels, like
-    //
-    //   -Wpedantic, -Wcast-qual, -Wsign-conversion, etc.
-    //
-    // (i.e., beyond -Wall -Wextra and /W4 that are currently already used)
-    //
-    // Their next step is to open a big pull request needed to silence all of
-    // the resulting messages. This comment is strategically placed here
-    // because the zero-length array construction below will almost certainly
-    // be flagged in this process.
-    //
-    // My policy on this is as follows: I am always happy to fix issues in the
-    // codebase. However, many of the resulting change requests are in the
-    // "ritual purification" category: things that cause churn, decrease
-    // readability, and which don't fix actual problems. It's a never-ending
-    // cycle because each new revision of such tooling adds further warnings
-    // and purification rites.
-    //
-    // So just to be clear: I do not wish to pepper this codebase with
-    // "const_cast" and #pragmas/comments to avoid warnings in external
-    // tooling just so those users can have a "silent" build. I don't think it
-    // is reasonable for them to impose their own style on this project.
-    //
-    // As a workaround it is likely possible to restrict the scope of style
-    // checks to particular C++ namespaces or source code locations.
-#if defined(_MSC_VER)
-    // MSVC doesn't support zero-length arrays
-    arg_data args[Size == 0 ? 1 : Size];
-#else
-    // GCC and Clang do.
+template<size_t Size> struct func_data_prelim : func_data_prelim_base {
     arg_data args[Size];
-#endif
 };
 
+template<> struct func_data_prelim<0> : func_data_prelim_base {};
+
+
 template <typename F>
 NB_INLINE void func_extra_apply(F &f, const name &name, size_t &) {
     f.name = name.value;
@@ -354,7 +328,7 @@ NB_INLINE void func_extra_apply(F &f, const arg &a, size_t &index) {
         flag |= (uint8_t) cast_flags::convert;
 
     arg_data &arg = f.args[index];
-    arg.flag = flag;
+    arg.flag |= flag;
     arg.name = a.name_;
     arg.signature = a.signature_;
     arg.value = nullptr;
diff --git a/extern/nanobind/include/nanobind/nb_call.h b/extern/nanobind/include/nanobind/nb_call.h
index 64206f370..c849e0433 100644
--- a/extern/nanobind/include/nanobind/nb_call.h
+++ b/extern/nanobind/include/nanobind/nb_call.h
@@ -96,7 +96,7 @@ NB_INLINE void call_init(PyObject **args, PyObject *kwnames, size_t &nargs,
         args[0] = nullptr;                                                     \
         args_p = args + 1;                                                     \
     }                                                                          \
-    nargs |= NB_VECTORCALL_ARGUMENTS_OFFSET;                                   \
+    nargs |= PY_VECTORCALL_ARGUMENTS_OFFSET;                                   \
     return steal(obj_vectorcall(base, args_p, nargs, kwnames, method_call))
 
 template <typename Derived>
diff --git a/extern/nanobind/include/nanobind/nb_cast.h b/extern/nanobind/include/nanobind/nb_cast.h
index 8cf039dd6..1c26cbf30 100644
--- a/extern/nanobind/include/nanobind/nb_cast.h
+++ b/extern/nanobind/include/nanobind/nb_cast.h
@@ -349,21 +349,7 @@ template <typename T> struct typed_base_name {
       static constexpr auto Name = type_caster<T>::Name;
 };
 
-#if PY_VERSION_HEX < 0x03090000
-#define NB_TYPED_NAME_PYTHON38(type, name)                     \
-    template <> struct typed_base_name<type> {                 \
-        static constexpr auto Name = detail::const_name(name); \
-    };
-
-NB_TYPED_NAME_PYTHON38(nanobind::tuple, NB_TYPING_TUPLE)
-NB_TYPED_NAME_PYTHON38(list, NB_TYPING_LIST)
-NB_TYPED_NAME_PYTHON38(set, NB_TYPING_SET)
-NB_TYPED_NAME_PYTHON38(dict, NB_TYPING_DICT)
-NB_TYPED_NAME_PYTHON38(type_object, NB_TYPING_TYPE)
-#endif
-
-// Base case: typed<T, Ts...> renders as T[Ts...], with some adjustments to
-// T for older versions of Python (typing.List instead of list, for example)
+// Base case: typed<T, Ts...> renders as T[Ts...]
 template <typename T, typename... Ts> struct typed_name {
     static constexpr auto Name =
             typed_base_name<intrinsic_t<T>>::Name + const_name("[") +
@@ -385,7 +371,7 @@ template <typename R, typename... Args>
 struct typed_name<callable, R(Args...)> {
     using Ret = std::conditional_t<std::is_void_v<R>, void_type, R>;
     static constexpr auto Name =
-            const_name(NB_TYPING_CALLABLE "[[") +
+            const_name("collections.abc.Callable[[") +
             concat(make_caster<Args>::Name...) + const_name("], ") +
             make_caster<Ret>::Name + const_name("]");
 };
@@ -394,7 +380,7 @@ template <typename R>
 struct typed_name<callable, R(...)> {
     using Ret = std::conditional_t<std::is_void_v<R>, void_type, R>;
     static constexpr auto Name =
-            const_name(NB_TYPING_CALLABLE "[..., ") +
+            const_name("collections.abc.Callable[..., ") +
             make_caster<Ret>::Name + const_name("]");
 };
 
diff --git a/extern/nanobind/include/nanobind/nb_class.h b/extern/nanobind/include/nanobind/nb_class.h
index 7733ab01f..40368b070 100644
--- a/extern/nanobind/include/nanobind/nb_class.h
+++ b/extern/nanobind/include/nanobind/nb_class.h
@@ -128,10 +128,8 @@ struct type_data {
     };
     void (*set_self_py)(void *, PyObject *) noexcept;
     bool (*keep_shared_from_this_alive)(PyObject *) noexcept;
-#if defined(Py_LIMITED_API)
     uint32_t dictoffset;
     uint32_t weaklistoffset;
-#endif
 };
 
 /// Information about a type that is only relevant when it is being created
@@ -186,6 +184,10 @@ NB_INLINE void type_extra_apply(type_init_data & t, const sig &s) {
     t.name = s.value;
 }
 
+NB_INLINE void type_extra_apply(type_init_data &, never_destruct) {
+    // intentionally empty
+}
+
 template <typename T>
 NB_INLINE void type_extra_apply(type_init_data &t, supplement<T>) {
     static_assert(std::is_trivially_default_constructible_v<T>,
@@ -588,7 +590,9 @@ class class_ : public object {
             }
         }
 
-        if constexpr (std::is_destructible_v<T>) {
+        constexpr bool has_never_destruct = (std::is_same_v<Extra, never_destruct> || ...);
+
+        if constexpr (std::is_destructible_v<T> && !has_never_destruct) {
             d.flags |= (uint32_t) detail::type_flags::is_destructible;
 
             if constexpr (!std::is_trivially_destructible_v<T>) {
diff --git a/extern/nanobind/include/nanobind/nb_defs.h b/extern/nanobind/include/nanobind/nb_defs.h
index 5dc1361bb..d3b50712a 100644
--- a/extern/nanobind/include/nanobind/nb_defs.h
+++ b/extern/nanobind/include/nanobind/nb_defs.h
@@ -28,6 +28,7 @@
 #  define NB_INLINE          __forceinline
 #  define NB_NOINLINE        __declspec(noinline)
 #  define NB_INLINE_LAMBDA
+#  define NB_NOUNROLL
 #else
 #  define NB_EXPORT          __attribute__ ((visibility("default")))
 #  define NB_IMPORT          NB_EXPORT
@@ -35,8 +36,14 @@
 #  define NB_NOINLINE        __attribute__((noinline))
 #  if defined(__clang__)
 #    define NB_INLINE_LAMBDA __attribute__((always_inline))
+#    define NB_NOUNROLL      _Pragma("nounroll")
 #  else
 #    define NB_INLINE_LAMBDA
+#    if defined(__GNUC__)
+#      define NB_NOUNROLL    _Pragma("GCC unroll 0")
+#    else
+#      define NB_NOUNROLL
+#    endif
 #  endif
 #endif
 
@@ -74,58 +81,12 @@
 #  define NB_HAS_U8STRING
 #endif
 
-#if defined(Py_TPFLAGS_HAVE_VECTORCALL)
-#  define NB_VECTORCALL PyObject_Vectorcall
-#  define NB_HAVE_VECTORCALL Py_TPFLAGS_HAVE_VECTORCALL
-#elif defined(_Py_TPFLAGS_HAVE_VECTORCALL)
-#  define NB_VECTORCALL _PyObject_Vectorcall
-#  define NB_HAVE_VECTORCALL _Py_TPFLAGS_HAVE_VECTORCALL
-#else
-#  define NB_HAVE_VECTORCALL (1UL << 11)
-#endif
-
-#if defined(PY_VECTORCALL_ARGUMENTS_OFFSET)
-#  define NB_VECTORCALL_ARGUMENTS_OFFSET PY_VECTORCALL_ARGUMENTS_OFFSET
-#  define NB_VECTORCALL_NARGS PyVectorcall_NARGS
-#else
-#  define NB_VECTORCALL_ARGUMENTS_OFFSET ((size_t) 1 << (8 * sizeof(size_t) - 1))
-#  define NB_VECTORCALL_NARGS(n) ((n) & ~NB_VECTORCALL_ARGUMENTS_OFFSET)
-#endif
-
-#if PY_VERSION_HEX < 0x03090000
-#  define NB_TYPING_ABC   "typing."
-#  define NB_TYPING_TUPLE "typing.Tuple"
-#  define NB_TYPING_LIST  "typing.List"
-#  define NB_TYPING_DICT  "typing.Dict"
-#  define NB_TYPING_SET   "typing.Set"
-#  define NB_TYPING_TYPE  "typing.Type"
-#else
-#  define NB_TYPING_ABC   "collections.abc."
-#  define NB_TYPING_TUPLE "tuple"
-#  define NB_TYPING_LIST  "list"
-#  define NB_TYPING_DICT  "dict"
-#  define NB_TYPING_SET   "set"
-#  define NB_TYPING_TYPE  "type"
-#endif
-
 #if PY_VERSION_HEX < 0x030D0000
 #  define NB_TYPING_CAPSULE "typing_extensions.CapsuleType"
 #else
 #  define NB_TYPING_CAPSULE "types.CapsuleType"
 #endif
 
-#define NB_TYPING_SEQUENCE     NB_TYPING_ABC "Sequence"
-#define NB_TYPING_MAPPING      NB_TYPING_ABC "Mapping"
-#define NB_TYPING_CALLABLE     NB_TYPING_ABC "Callable"
-#define NB_TYPING_ITERATOR     NB_TYPING_ABC "Iterator"
-#define NB_TYPING_ITERABLE     NB_TYPING_ABC "Iterable"
-
-#if PY_VERSION_HEX < 0x03090000
-#  define NB_TYPING_ABSTRACT_SET "typing.AbstractSet"
-#else
-#  define NB_TYPING_ABSTRACT_SET "collections.abc.Set"
-#endif
-
 #if defined(Py_LIMITED_API)
 #  if PY_VERSION_HEX < 0x030C0000 || defined(PYPY_VERSION)
 #    error "nanobind can target Python's limited API, but this requires CPython >= 3.12"
@@ -179,6 +140,12 @@
 #  define NB_TYPE_GET_SLOT_IMPL 1
 #endif
 
+#if defined(Py_LIMITED_API)
+#  define NB_DYNAMIC_VERSION Py_Version
+#else
+#  define NB_DYNAMIC_VERSION PY_VERSION_HEX
+#endif
+
 #define NB_MODULE_SLOTS_0 { 0, nullptr }
 
 #if PY_VERSION_HEX < 0x030C0000
@@ -202,12 +169,14 @@
     X(const X &) = delete;                                                     \
     X &operator=(const X &) = delete;
 
+#define NB_MOD_STATE_SIZE (12 * sizeof(PyObject*))
+
 // Helper macros to ensure macro arguments are expanded before token pasting/stringification
 #define NB_MODULE_IMPL(name, variable) NB_MODULE_IMPL2(name, variable)
 #define NB_MODULE_IMPL2(name, variable)                                        \
     static void nanobind_##name##_exec_impl(nanobind::module_);                \
     static int nanobind_##name##_exec(PyObject *m) {                           \
-        nanobind::detail::init(NB_DOMAIN_STR);                                 \
+        nanobind::detail::nb_module_exec(NB_DOMAIN_STR, m);                    \
         try {                                                                  \
             nanobind_##name##_exec_impl(                                       \
                 nanobind::borrow<nanobind::module_>(m));                       \
@@ -227,8 +196,9 @@
         NB_MODULE_SLOTS_2                                                      \
     };                                                                         \
     static struct PyModuleDef nanobind_##name##_module = {                     \
-        PyModuleDef_HEAD_INIT, #name, nullptr, 0, nullptr,                     \
-        nanobind_##name##_slots, nullptr, nullptr, nullptr                     \
+        PyModuleDef_HEAD_INIT, #name, nullptr, NB_MOD_STATE_SIZE, nullptr,     \
+        nanobind_##name##_slots, nanobind::detail::nb_module_traverse,         \
+        nanobind::detail::nb_module_clear, nanobind::detail::nb_module_free    \
     };                                                                         \
     extern "C" [[maybe_unused]] NB_EXPORT PyObject *PyInit_##name(void);       \
     extern "C" PyObject *PyInit_##name(void) {                                 \
diff --git a/extern/nanobind/include/nanobind/nb_func.h b/extern/nanobind/include/nanobind/nb_func.h
index 10eb39945..5daab0dc2 100644
--- a/extern/nanobind/include/nanobind/nb_func.h
+++ b/extern/nanobind/include/nanobind/nb_func.h
@@ -75,7 +75,7 @@ NB_INLINE PyObject *func_create(Func &&func, Return (*)(Args...),
 
     if constexpr (CheckGuard && !std::is_same_v<typename Info::call_guard, void>) {
         return func_create<ReturnRef, false>(
-            [func = (forward_t<Func>) func](Args... args) NB_INLINE_LAMBDA {
+            [func = (forward_t<Func>) func](Args... args) NB_INLINE_LAMBDA -> Return {
                 typename Info::call_guard::type g;
                 (void) g;
                 return func((forward_t<Args>) args...);
@@ -95,6 +95,8 @@ NB_INLINE PyObject *func_create(Func &&func, Return (*)(Args...),
         kwargs_pos_n = index_n_v<std::is_same_v<intrinsic_t<Args>, kwargs>...>,
         nargs = sizeof...(Args);
 
+    constexpr bool has_arg_defaults = (detail::has_arg_defaults_v<Args> || ... || false);
+
     // Determine the number of nb::arg/nb::arg_v annotations
     constexpr size_t nargs_provided =
         (std::is_base_of_v<arg, Extra> + ... + 0);
@@ -102,7 +104,7 @@ NB_INLINE PyObject *func_create(Func &&func, Return (*)(Args...),
         (std::is_same_v<is_method, Extra> + ... + 0) != 0;
     constexpr bool is_getter_det =
         (std::is_same_v<is_getter, Extra> + ... + 0) != 0;
-    constexpr bool has_arg_annotations = nargs_provided > 0 && !is_getter_det;
+    constexpr bool has_arg_annotations = has_arg_defaults || (nargs_provided > 0 && !is_getter_det);
 
     // Determine the number of potentially-locked function arguments
     constexpr bool lock_self_det =
@@ -128,7 +130,7 @@ NB_INLINE PyObject *func_create(Func &&func, Return (*)(Args...),
     // A few compile-time consistency checks
     static_assert(args_pos_1 == args_pos_n && kwargs_pos_1 == kwargs_pos_n,
         "Repeated use of nb::kwargs or nb::args in the function signature!");
-    static_assert(!has_arg_annotations || nargs_provided + is_method_det == nargs,
+    static_assert(!has_arg_annotations || has_arg_defaults || nargs_provided + is_method_det == nargs,
         "The number of nb::arg annotations must match the argument count!");
     static_assert(kwargs_pos_1 == nargs || kwargs_pos_1 + 1 == nargs,
         "nb::kwargs must be the last element of the function signature!");
@@ -188,7 +190,20 @@ NB_INLINE PyObject *func_create(Func &&func, Return (*)(Args...),
     };
 
     // The following temporary record will describe the function in detail
-    func_data_prelim<nargs_provided> f;
+    func_data_prelim<has_arg_defaults ? nargs : nargs_provided> f;
+
+    // Initialize argument flags. The first branch turns std::optional<> types
+    // into implicit nb::none() anotations.
+    if constexpr (has_arg_defaults) {
+        size_t i = 0;
+        ((f.args[i++] = { nullptr, nullptr, nullptr, nullptr,
+            has_arg_defaults_v<Args> ? (uint8_t) cast_flags::accepts_none
+                                     : (uint8_t) 0 }), ...);
+    } else if constexpr (nargs_provided > 0) {
+        for (size_t i = 0; i < nargs_provided; ++i)
+            f.args[i].flag = 0;
+    }
+
     f.flags = (args_pos_1   < nargs ? (uint32_t) func_flags::has_var_args   : 0) |
               (kwargs_pos_1 < nargs ? (uint32_t) func_flags::has_var_kwargs : 0) |
               (ReturnRef            ? (uint32_t) func_flags::return_ref     : 0) |
@@ -311,7 +326,7 @@ NB_INLINE PyObject *func_create(Func &&func, Return (*)(Args...),
 
     (void) arg_index;
 
-    return nb_func_new((const void *) &f);
+    return nb_func_new(&f);
 }
 
 NAMESPACE_END(detail)
diff --git a/extern/nanobind/include/nanobind/nb_lib.h b/extern/nanobind/include/nanobind/nb_lib.h
index 2ae1c12ce..8fd35a1a2 100644
--- a/extern/nanobind/include/nanobind/nb_lib.h
+++ b/extern/nanobind/include/nanobind/nb_lib.h
@@ -9,8 +9,17 @@
 
 NAMESPACE_BEGIN(NB_NAMESPACE)
 
+NAMESPACE_BEGIN(dlpack)
+
+// The version of DLPack that is supported by libnanobind
+static constexpr uint32_t major_version = 1;
+static constexpr uint32_t minor_version = 1;
+
 // Forward declarations for types in ndarray.h (1)
-namespace dlpack { struct dltensor; struct dtype; }
+struct dltensor;
+struct dtype;
+
+NAMESPACE_END(dlpack)
 
 NAMESPACE_BEGIN(detail)
 
@@ -107,7 +116,10 @@ NB_CORE void raise_next_overload_if_null(void *p);
 
 // ========================================================================
 
-NB_CORE void init(const char *domain);
+NB_CORE void nb_module_exec(const char *domain, PyObject *m);
+NB_CORE int  nb_module_traverse(PyObject *m, visitproc visit, void *arg);
+NB_CORE int  nb_module_clear(PyObject *m);
+NB_CORE void nb_module_free(void *m);
 
 // ========================================================================
 
@@ -273,8 +285,11 @@ NB_CORE PyObject *capsule_new(const void *ptr, const char *name,
 
 // ========================================================================
 
+// Forward declaration for type in nb_attr.h
+struct func_data_prelim_base;
+
 /// Create a Python function object for the given function record
-NB_CORE PyObject *nb_func_new(const void *data) noexcept;
+NB_CORE PyObject *nb_func_new(const func_data_prelim_base *f) noexcept;
 
 // ========================================================================
 
@@ -452,9 +467,6 @@ NB_CORE PyObject *module_import(const char *name);
 /// Try to import a Python extension module, raises an exception upon failure
 NB_CORE PyObject *module_import(PyObject *name);
 
-/// Create a new extension module with the given name
-NB_CORE PyObject *module_new(const char *name, PyModuleDef *def) noexcept;
-
 /// Create a submodule of an existing module
 NB_CORE PyObject *module_new_submodule(PyObject *base, const char *name,
                                        const char *doc) noexcept;
@@ -469,7 +481,7 @@ NB_CORE ndarray_handle *ndarray_import(PyObject *o,
                                        cleanup_list *cleanup) noexcept;
 
 // Describe a local ndarray object using a DLPack capsule
-NB_CORE ndarray_handle *ndarray_create(void *value, size_t ndim,
+NB_CORE ndarray_handle *ndarray_create(void *data, size_t ndim,
                                        const size_t *shape, PyObject *owner,
                                        const int64_t *strides,
                                        dlpack::dtype dtype, bool ro,
diff --git a/extern/nanobind/include/nanobind/nb_python.h b/extern/nanobind/include/nanobind/nb_python.h
index 356500c70..54ee2f0bc 100644
--- a/extern/nanobind/include/nanobind/nb_python.h
+++ b/extern/nanobind/include/nanobind/nb_python.h
@@ -56,6 +56,6 @@
 #  pragma warning(pop)
 #endif
 
-#if PY_VERSION_HEX < 0x03080000
-#  error The nanobind library requires Python 3.8 (or newer)
+#if PY_VERSION_HEX < 0x03090000
+#  error The nanobind library requires Python 3.9 (or newer)
 #endif
diff --git a/extern/nanobind/include/nanobind/nb_traits.h b/extern/nanobind/include/nanobind/nb_traits.h
index 4480c868a..acfe5b58a 100644
--- a/extern/nanobind/include/nanobind/nb_traits.h
+++ b/extern/nanobind/include/nanobind/nb_traits.h
@@ -195,6 +195,12 @@ struct is_complex<T, enable_if_t<std::is_same_v<
 template<typename T>
 inline constexpr bool is_complex_v = is_complex<T>::value;
 
+template <typename T>
+struct has_arg_defaults : std::false_type {};
+
+template <typename T>
+constexpr bool has_arg_defaults_v = has_arg_defaults<intrinsic_t<T>>::value;
+
 NAMESPACE_END(detail)
 
 template <typename... Args>
diff --git a/extern/nanobind/include/nanobind/nb_types.h b/extern/nanobind/include/nanobind/nb_types.h
index 49a840564..a0e303ae8 100644
--- a/extern/nanobind/include/nanobind/nb_types.h
+++ b/extern/nanobind/include/nanobind/nb_types.h
@@ -162,6 +162,33 @@ template <typename Derived> class api : public api_tag {
 
 NAMESPACE_END(detail)
 
+// *WARNING*: nanobind regularly receives requests from users who run it
+// through Clang-Tidy, or who compile with increased warnings levels, like
+//
+//     -Wcast-qual, -Wsign-conversion, etc.
+//
+// (i.e., beyond -Wall -Wextra and /W4 that are currently already used)
+//
+// Their next step is to open a big pull request needed to silence all of
+// the resulting messages.  This comment is strategically placed here
+// because the (PyObject *) casts below cast away the const qualifier and
+// will almost certainly be flagged in this process.
+//
+// My policy on this is as follows: I am always happy to fix issues in the
+// codebase.  However, many of the resulting change requests are in the
+// "ritual purification" category: things that cause churn, decrease
+// readability, and which don't fix actual problems.  It's a never-ending
+// cycle because each new revision of such tooling adds further warnings
+// and purification rites.
+//
+// So just to be clear: I do not wish to pepper this codebase with
+// "const_cast" and #pragmas/comments to avoid warnings in external
+// tooling just so those users can have a "silent" build.  I don't think it
+// is reasonable for them to impose their own style on this project.
+//
+// As a workaround it is likely possible to restrict the scope of style
+// checks to particular C++ namespaces or source code locations.
+
 class handle : public detail::api<handle> {
     friend class python_error;
     friend struct detail::str_attr;
@@ -460,13 +487,8 @@ NAMESPACE_END(literals)
 class bytearray : public object {
     NB_OBJECT(bytearray, object, "bytearray", PyByteArray_Check)
 
-#if PY_VERSION_HEX >= 0x03090000
     bytearray()
         : object(PyObject_CallNoArgs((PyObject *)&PyByteArray_Type), detail::steal_t{}) { }
-#else
-    bytearray()
-        : object(PyObject_CallObject((PyObject *)&PyByteArray_Type, NULL), detail::steal_t{}) { }
-#endif
 
     explicit bytearray(handle h)
         : object(detail::bytearray_from_obj(h.ptr()), detail::steal_t{}) { }
@@ -604,11 +626,11 @@ class frozenset : public object {
 };
 
 class sequence : public object {
-    NB_OBJECT_DEFAULT(sequence, object, NB_TYPING_SEQUENCE, PySequence_Check)
+    NB_OBJECT_DEFAULT(sequence, object, "collections.abc.Sequence", PySequence_Check)
 };
 
 class mapping : public object {
-    NB_OBJECT_DEFAULT(mapping, object, NB_TYPING_MAPPING, PyMapping_Check)
+    NB_OBJECT_DEFAULT(mapping, object, "collections.abc.Mapping", PyMapping_Check)
     list keys() const { return steal<list>(detail::obj_op_1(m_ptr, PyMapping_Keys)); }
     list values() const { return steal<list>(detail::obj_op_1(m_ptr, PyMapping_Values)); }
     list items() const { return steal<list>(detail::obj_op_1(m_ptr, PyMapping_Items)); }
@@ -630,7 +652,7 @@ class iterator : public object {
     using reference = const handle;
     using pointer = const handle *;
 
-    NB_OBJECT_DEFAULT(iterator, object, NB_TYPING_ITERATOR, PyIter_Check)
+    NB_OBJECT_DEFAULT(iterator, object, "collections.abc.Iterator", PyIter_Check)
 
     iterator& operator++() {
         m_value = steal(detail::obj_iter_next(m_ptr));
@@ -662,7 +684,7 @@ class iterator : public object {
 
 class iterable : public object {
 public:
-    NB_OBJECT_DEFAULT(iterable, object, NB_TYPING_ITERABLE, detail::iterable_check)
+    NB_OBJECT_DEFAULT(iterable, object, "collections.abc.Iterable", detail::iterable_check)
 };
 
 /// Retrieve the Python type object associated with a C++ class
@@ -749,7 +771,7 @@ class not_implemented : public object {
 
 class callable : public object {
 public:
-    NB_OBJECT(callable, object, NB_TYPING_CALLABLE, PyCallable_Check)
+    NB_OBJECT(callable, object, "collections.abc.Callable", PyCallable_Check)
     using object::object;
 };
 
@@ -793,7 +815,7 @@ struct fallback : public handle {
 
 template <typename T> class type_object_t : public type_object {
 public:
-    static constexpr auto Name = detail::const_name(NB_TYPING_TYPE "[") +
+    static constexpr auto Name = detail::const_name("type[") +
                                  detail::make_caster<T>::Name +
                                  detail::const_name("]");
 
diff --git a/extern/nanobind/include/nanobind/ndarray.h b/extern/nanobind/include/nanobind/ndarray.h
index f71dc7e5f..63802963d 100644
--- a/extern/nanobind/include/nanobind/ndarray.h
+++ b/extern/nanobind/include/nanobind/ndarray.h
@@ -18,11 +18,16 @@
 
 NAMESPACE_BEGIN(NB_NAMESPACE)
 
-/// dlpack API/ABI data structures are part of a separate namespace
+/// DLPack API/ABI data structures are part of a separate namespace.
 NAMESPACE_BEGIN(dlpack)
 
 enum class dtype_code : uint8_t {
-    Int = 0, UInt = 1, Float = 2, Bfloat = 4, Complex = 5, Bool = 6
+    Int = 0, UInt = 1, Float = 2, Bfloat = 4, Complex = 5, Bool = 6,
+    Float8_E3M4 = 7, Float8_E4M3 = 8, Float8_E4M3B11FNUZ = 9,
+    Float8_E4M3FN = 10, Float8_E4M3FNUZ = 11, Float8_E5M2 = 12,
+    Float8_E5M2FNUZ = 13, Float8_E8M0FNU = 14,
+    Float6_E2M3FN = 15, Float6_E3M2FN = 16,
+    Float4_E2M1FN = 17
 };
 
 struct device {
@@ -86,6 +91,7 @@ NB_FRAMEWORK(tensorflow, 3, "tensorflow.python.framework.ops.EagerTensor");
 NB_FRAMEWORK(jax, 4, "jaxlib.xla_extension.DeviceArray");
 NB_FRAMEWORK(cupy, 5, "cupy.ndarray");
 NB_FRAMEWORK(memview, 6, "memoryview");
+NB_FRAMEWORK(array_api, 7, "ArrayLike");
 
 NAMESPACE_BEGIN(device)
 NB_DEVICE(none, 0); NB_DEVICE(cpu, 1); NB_DEVICE(cuda, 2);
diff --git a/extern/nanobind/include/nanobind/stl/chrono.h b/extern/nanobind/include/nanobind/stl/chrono.h
index 75a4a6ea9..4bedf32e5 100644
--- a/extern/nanobind/include/nanobind/stl/chrono.h
+++ b/extern/nanobind/include/nanobind/stl/chrono.h
@@ -99,13 +99,8 @@ template <typename type> class duration_caster {
         return pack_timedelta(dd.count(), ss.count(), us.count());
     }
 
-    #if PY_VERSION_HEX < 0x03090000
-        NB_TYPE_CASTER(type, io_name("typing.Union[datetime.timedelta, float]",
-                                     "datetime.timedelta"))
-    #else
-        NB_TYPE_CASTER(type, io_name("datetime.timedelta | float",
-                                     "datetime.timedelta"))
-    #endif
+    NB_TYPE_CASTER(type, io_name("datetime.timedelta | float",
+                                 "datetime.timedelta"))
 };
 
 template <class... Args>
@@ -214,13 +209,8 @@ class type_caster<std::chrono::time_point<std::chrono::system_clock, Duration>>
                              localtime.tm_sec,
                              (int) us.count());
     }
-    #if PY_VERSION_HEX < 0x03090000
-        NB_TYPE_CASTER(type, io_name("typing.Union[datetime.datetime, datetime.date, datetime.time]",
-                                     "datetime.datetime"))
-    #else
-        NB_TYPE_CASTER(type, io_name("datetime.datetime | datetime.date | datetime.time",
-                                     "datetime.datetime"))
-    #endif
+    NB_TYPE_CASTER(type, io_name("datetime.datetime | datetime.date | datetime.time",
+                                 "datetime.datetime"))
 };
 
 // Other clocks that are not the system clock are not measured as
diff --git a/extern/nanobind/include/nanobind/stl/detail/nb_array.h b/extern/nanobind/include/nanobind/stl/detail/nb_array.h
index 728f9c56c..191a90dca 100644
--- a/extern/nanobind/include/nanobind/stl/detail/nb_array.h
+++ b/extern/nanobind/include/nanobind/stl/detail/nb_array.h
@@ -6,7 +6,7 @@ NAMESPACE_BEGIN(NB_NAMESPACE)
 NAMESPACE_BEGIN(detail)
 
 template <typename Array, typename Entry, size_t Size> struct array_caster {
-    NB_TYPE_CASTER(Array, io_name(NB_TYPING_SEQUENCE, NB_TYPING_LIST) +
+    NB_TYPE_CASTER(Array, io_name("collections.abc.Sequence", "list") +
                               const_name("[") + make_caster<Entry>::Name +
                               const_name("]"))
 
@@ -33,10 +33,10 @@ template <typename Array, typename Entry, size_t Size> struct array_caster {
 
                 value[i] = caster.operator cast_t<Entry>();
             }
-
-            Py_XDECREF(temp);
         }
 
+        Py_XDECREF(temp);
+
         return success;
     }
 
diff --git a/extern/nanobind/include/nanobind/stl/detail/nb_dict.h b/extern/nanobind/include/nanobind/stl/detail/nb_dict.h
index 24f77ea21..e38952876 100644
--- a/extern/nanobind/include/nanobind/stl/detail/nb_dict.h
+++ b/extern/nanobind/include/nanobind/stl/detail/nb_dict.h
@@ -15,7 +15,7 @@ NAMESPACE_BEGIN(NB_NAMESPACE)
 NAMESPACE_BEGIN(detail)
 
 template <typename Dict, typename Key, typename Val> struct dict_caster {
-    NB_TYPE_CASTER(Dict, io_name(NB_TYPING_MAPPING, NB_TYPING_DICT) +
+    NB_TYPE_CASTER(Dict, io_name("collections.abc.Mapping", "dict") +
                              const_name("[") + make_caster<Key>::Name +
                              const_name(", ") + make_caster<Val>::Name +
                              const_name("]"))
diff --git a/extern/nanobind/include/nanobind/stl/detail/nb_list.h b/extern/nanobind/include/nanobind/stl/detail/nb_list.h
index 5874f7d05..95823a3e1 100644
--- a/extern/nanobind/include/nanobind/stl/detail/nb_list.h
+++ b/extern/nanobind/include/nanobind/stl/detail/nb_list.h
@@ -15,7 +15,7 @@ NAMESPACE_BEGIN(NB_NAMESPACE)
 NAMESPACE_BEGIN(detail)
 
 template <typename List, typename Entry> struct list_caster {
-    NB_TYPE_CASTER(List, io_name(NB_TYPING_SEQUENCE, NB_TYPING_LIST) +
+    NB_TYPE_CASTER(List, io_name("collections.abc.Sequence", "list") +
                               const_name("[") + make_caster<Entry>::Name +
                               const_name("]"))
 
diff --git a/extern/nanobind/include/nanobind/stl/detail/nb_set.h b/extern/nanobind/include/nanobind/stl/detail/nb_set.h
index d3a3250e4..0266531ba 100644
--- a/extern/nanobind/include/nanobind/stl/detail/nb_set.h
+++ b/extern/nanobind/include/nanobind/stl/detail/nb_set.h
@@ -15,7 +15,7 @@ NAMESPACE_BEGIN(NB_NAMESPACE)
 NAMESPACE_BEGIN(detail)
 
 template <typename Set, typename Key> struct set_caster {
-    NB_TYPE_CASTER(Set, io_name(NB_TYPING_ABSTRACT_SET, NB_TYPING_SET) +
+    NB_TYPE_CASTER(Set, io_name("collections.abc.Set", "set") +
                             const_name("[") + make_caster<Key>::Name +
                             const_name("]"))
 
diff --git a/extern/nanobind/include/nanobind/stl/filesystem.h b/extern/nanobind/include/nanobind/stl/filesystem.h
index 78af55092..bdc231457 100644
--- a/extern/nanobind/include/nanobind/stl/filesystem.h
+++ b/extern/nanobind/include/nanobind/stl/filesystem.h
@@ -71,11 +71,7 @@ struct type_caster<std::filesystem::path> {
         return success;
     }
 
-#if PY_VERSION_HEX < 0x03090000
-    NB_TYPE_CASTER(std::filesystem::path, io_name("typing.Union[str, os.PathLike]", "pathlib.Path"))
-#else
     NB_TYPE_CASTER(std::filesystem::path, io_name("str | os.PathLike", "pathlib.Path"))
-#endif
 
 private:
     static str to_py_str(const std::string &s) {
diff --git a/extern/nanobind/include/nanobind/stl/function.h b/extern/nanobind/include/nanobind/stl/function.h
index b96234028..6e61c56f8 100644
--- a/extern/nanobind/include/nanobind/stl/function.h
+++ b/extern/nanobind/include/nanobind/stl/function.h
@@ -50,7 +50,7 @@ struct type_caster<std::function<Return(Args...)>> {
         std::conditional_t<std::is_void_v<Return>, void_type, Return>>;
 
     NB_TYPE_CASTER(std::function <Return(Args...)>,
-                   const_name(NB_TYPING_CALLABLE "[[") +
+                   const_name("collections.abc.Callable[[") +
                        concat(make_caster<Args>::Name...) + const_name("], ") +
                        ReturnCaster::Name + const_name("]"))
 
diff --git a/extern/nanobind/include/nanobind/stl/optional.h b/extern/nanobind/include/nanobind/stl/optional.h
index 61ca923da..27eb81ade 100644
--- a/extern/nanobind/include/nanobind/stl/optional.h
+++ b/extern/nanobind/include/nanobind/stl/optional.h
@@ -23,5 +23,8 @@ struct type_caster<std::optional<T>> : optional_caster<std::optional<T>> {};
 
 template <> struct type_caster<std::nullopt_t> : none_caster<std::nullopt_t> { };
 
+template <typename T>
+struct has_arg_defaults<std::optional<T>> : std::true_type {};
+
 NAMESPACE_END(detail)
 NAMESPACE_END(NB_NAMESPACE)
diff --git a/extern/nanobind/include/nanobind/stl/pair.h b/extern/nanobind/include/nanobind/stl/pair.h
index 596b3d3a4..ebcbf4ed1 100644
--- a/extern/nanobind/include/nanobind/stl/pair.h
+++ b/extern/nanobind/include/nanobind/stl/pair.h
@@ -30,7 +30,7 @@ template <typename T1, typename T2> struct type_caster<std::pair<T1, T2>> {
 
     // Value name for docstring generation
     static constexpr auto Name =
-        const_name(NB_TYPING_TUPLE "[") + concat(Caster1::Name, Caster2::Name) + const_name("]");
+        const_name("tuple[") + concat(Caster1::Name, Caster2::Name) + const_name("]");
 
     /// Python -> C++ caster, populates `caster1` and `caster2` upon success
     bool from_python(handle src, uint8_t flags,
diff --git a/extern/nanobind/include/nanobind/stl/tuple.h b/extern/nanobind/include/nanobind/stl/tuple.h
index ed9960274..b4af58e82 100644
--- a/extern/nanobind/include/nanobind/stl/tuple.h
+++ b/extern/nanobind/include/nanobind/stl/tuple.h
@@ -23,7 +23,7 @@ template <typename... Ts> struct type_caster<std::tuple<Ts...>> {
     using Indices = std::make_index_sequence<N>;
 
     static constexpr auto Name =
-        const_name(NB_TYPING_TUPLE "[") +
+        const_name("tuple[") +
         const_name<N == 0>(const_name("()"), concat(make_caster<Ts>::Name...)) +
         const_name("]");
 
diff --git a/extern/nanobind/include/nanobind/trampoline.h b/extern/nanobind/include/nanobind/trampoline.h
index 2789e5556..9b18f89b9 100644
--- a/extern/nanobind/include/nanobind/trampoline.h
+++ b/extern/nanobind/include/nanobind/trampoline.h
@@ -26,7 +26,7 @@ NB_CORE void trampoline_leave(ticket *ticket) noexcept;
 template <size_t Size> struct trampoline {
     mutable void *data[2 * Size + 1];
 
-    NB_INLINE trampoline(void *ptr) { trampoline_new(data, Size, ptr); }
+    NB_INLINE constexpr trampoline(void *ptr) { trampoline_new(data, Size, ptr); }
     NB_INLINE ~trampoline() { trampoline_release(data, Size); }
 
     NB_INLINE handle base() const { return (PyObject *) data[0]; }
diff --git a/extern/nanobind/include/nanobind/typing.h b/extern/nanobind/include/nanobind/typing.h
index ed5ab90d6..be1173713 100644
--- a/extern/nanobind/include/nanobind/typing.h
+++ b/extern/nanobind/include/nanobind/typing.h
@@ -28,4 +28,9 @@ object type_var_tuple(Args&&... args) {
     return typing().attr("TypeVarTuple")((detail::forward_t<Args>) args...);
 }
 
+template <typename... Args>
+object param_spec(Args&&... args) {
+    return typing().attr("ParamSpec")((detail::forward_t<Args>) args...);
+}
+
 NAMESPACE_END(NB_NAMESPACE)
diff --git a/extern/nanobind/pyproject.toml b/extern/nanobind/pyproject.toml
index 6430c649d..d711cbaf1 100644
--- a/extern/nanobind/pyproject.toml
+++ b/extern/nanobind/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
 
 [project]
 name = "nanobind"
-version = "2.9.2"
+version = "2.11.0"
 description = "nanobind: tiny and efficient C++/Python bindings"
 readme.content-type = "text/markdown"
 readme.text = """
diff --git a/extern/nanobind/src/__init__.py b/extern/nanobind/src/__init__.py
index 43b1e263b..0b2c021e7 100644
--- a/extern/nanobind/src/__init__.py
+++ b/extern/nanobind/src/__init__.py
@@ -1,8 +1,8 @@
 import sys
 import os
 
-if sys.version_info < (3, 8):
-    raise ImportError("nanobind does not support Python < 3.8.")
+if sys.version_info < (3, 9):
+    raise ImportError("nanobind does not support Python < 3.9.")
 
 def source_dir() -> str:
     "Return the path to the nanobind source directory."
@@ -16,7 +16,7 @@ def cmake_dir() -> str:
     "Return the path to the nanobind CMake module directory."
     return os.path.join(os.path.abspath(os.path.dirname(__file__)), "cmake")
 
-__version__ = "2.9.2"
+__version__ = "2.11.0"
 
 __all__ = (
     "__version__",
diff --git a/extern/nanobind/src/common.cpp b/extern/nanobind/src/common.cpp
index ff394d56e..1a33cedcb 100644
--- a/extern/nanobind/src/common.cpp
+++ b/extern/nanobind/src/common.cpp
@@ -141,16 +141,6 @@ void cleanup_list::expand() noexcept {
 
 // ========================================================================
 
-PyObject *module_new(const char *name, PyModuleDef *def) noexcept {
-    memset(def, 0, sizeof(PyModuleDef));
-    def->m_name = name;
-    def->m_size = -1;
-    PyObject *m = PyModule_Create(def);
-
-    check(m, "nanobind::detail::module_new(): allocation failed!");
-    return m;
-}
-
 PyObject *module_import(const char *name) {
     PyObject *res = PyImport_ImportModule(name);
     if (!res)
@@ -287,7 +277,7 @@ PyObject *obj_vectorcall(PyObject *base, PyObject *const *args, size_t nargsf,
     PyObject *res = nullptr;
     bool gil_error = false, cast_error = false;
 
-    size_t nargs_total = (size_t) (NB_VECTORCALL_NARGS(nargsf) +
+    size_t nargs_total = (size_t) (PyVectorcall_NARGS(nargsf) +
                          (kwnames ? NB_TUPLE_GET_SIZE(kwnames) : 0));
 
 #if !defined(Py_LIMITED_API)
@@ -304,20 +294,8 @@ PyObject *obj_vectorcall(PyObject *base, PyObject *const *args, size_t nargsf,
         }
     }
 
-#if PY_VERSION_HEX < 0x03090000
-    if (method_call) {
-        PyObject *self = PyObject_GetAttr(args[0], /* name = */ base);
-        if (self) {
-            res = _PyObject_Vectorcall(self, (PyObject **) args + 1, nargsf - 1, kwnames);
-            Py_DECREF(self);
-        }
-    } else {
-        res = _PyObject_Vectorcall(base, (PyObject **) args, nargsf, kwnames);
-    }
-#else
     res = (method_call ? PyObject_VectorcallMethod
                        : PyObject_Vectorcall)(base, args, nargsf, kwnames);
-#endif
 
 end:
     for (size_t i = 0; i < nargs_total; ++i)
@@ -461,13 +439,23 @@ void setattr(PyObject *obj, PyObject *key, PyObject *value) {
 }
 
 void delattr(PyObject *obj, const char *key) {
+#if defined(Py_LIMITED_API) && PY_LIMITED_API < 0x030D0000
+    int rv = PyObject_SetAttrString(obj, key, nullptr);
+#else
     int rv = PyObject_DelAttrString(obj, key);
+#endif
+
     if (rv)
         raise_python_error();
 }
 
 void delattr(PyObject *obj, PyObject *key) {
+#if defined(Py_LIMITED_API) && PY_LIMITED_API < 0x030D0000
+    int rv = PyObject_SetAttr(obj, key, nullptr);
+#else
     int rv = PyObject_DelAttr(obj, key);
+#endif
+
     if (rv)
         raise_python_error();
 }
@@ -766,7 +754,7 @@ PyObject **seq_get(PyObject *seq, size_t *size_out, PyObject **temp_out) noexcep
 
                 if (temp) {
                     size = (size_t) size_seq;
-                } else if (!temp) {
+                } else {
                     PyErr_Clear();
                     for (Py_ssize_t i = 0; i < size_seq; ++i)
                         Py_DECREF(result[i]);
@@ -817,12 +805,17 @@ PyObject **seq_get_with_size(PyObject *seq, size_t size,
         }
 #  endif
     } else if (PySequence_Check(seq)) {
-        temp = PySequence_Tuple(seq);
-
-        if (temp)
-            result = seq_get_with_size(temp, size, temp_out);
-        else
-            PyErr_Clear();
+        Py_ssize_t size_seq = PySequence_Size(seq);
+        if (size_seq != (Py_ssize_t) size) {
+            if (size_seq == -1)
+                PyErr_Clear();
+        } else {
+            temp = PySequence_Tuple(seq);
+            if (temp)
+                result = seq_get_with_size(temp, size, temp_out);
+            else
+                PyErr_Clear();
+        }
     }
 #else
     /* There isn't a nice way to get a PyObject** in Py_LIMITED_API. This
diff --git a/extern/nanobind/src/error.cpp b/extern/nanobind/src/error.cpp
index 5d1d6666e..c45a39dd1 100644
--- a/extern/nanobind/src/error.cpp
+++ b/extern/nanobind/src/error.cpp
@@ -167,22 +167,13 @@ const char *python_error::what() const noexcept {
 
         while (frame) {
             frames.push_back(frame);
-#if PY_VERSION_HEX >= 0x03090000
             frame = PyFrame_GetBack(frame);
-#else
-            frame = frame->f_back;
-            Py_XINCREF(frame);
-#endif
         }
 
         buf.put("Traceback (most recent call last):\n");
         for (auto it = frames.rbegin(); it != frames.rend(); ++it) {
             frame = *it;
-#if PY_VERSION_HEX >= 0x03090000
             PyCodeObject *f_code = PyFrame_GetCode(frame);
-#else
-            PyCodeObject *f_code = frame->f_code;
-#endif
             buf.put("  File \"");
             buf.put_dstr(borrow<str>(f_code->co_filename).c_str());
             buf.put("\", line ");
@@ -190,9 +181,7 @@ const char *python_error::what() const noexcept {
             buf.put(", in ");
             buf.put_dstr(borrow<str>(f_code->co_name).c_str());
             buf.put('\n');
-#if PY_VERSION_HEX >= 0x03090000
             Py_DECREF(f_code);
-#endif
             Py_DECREF(frame);
         }
     }
diff --git a/extern/nanobind/src/implicit.cpp b/extern/nanobind/src/implicit.cpp
index dff87bddb..10702a06a 100644
--- a/extern/nanobind/src/implicit.cpp
+++ b/extern/nanobind/src/implicit.cpp
@@ -15,13 +15,13 @@ NAMESPACE_BEGIN(detail)
 
 void implicitly_convertible(const std::type_info *src,
                             const std::type_info *dst) noexcept {
-    nb_internals *internals_ {internals};
-    type_data *t {nb_type_c2p(internals_, dst)};
+    nb_internals *internals_ = internals;
+    type_data *t = nb_type_c2p(internals_, dst);
     check(t, "nanobind::detail::implicitly_convertible(src=%s, dst=%s): "
              "destination type unknown!", type_name(src), type_name(dst));
 
     lock_internals guard(internals_);
-    size_t size {0};
+    size_t size = 0;
 
     if (t->flags & (uint32_t) type_flags::has_implicit_conversions) {
         while (t->implicit.cpp && t->implicit.cpp[size])
@@ -45,13 +45,13 @@ void implicitly_convertible(const std::type_info *src,
 void implicitly_convertible(bool (*predicate)(PyTypeObject *, PyObject *,
                                               cleanup_list *),
                             const std::type_info *dst) noexcept {
-    nb_internals *internals_ {internals};
-    type_data *t {nb_type_c2p(internals_, dst)};
+    nb_internals *internals_ = internals;
+    type_data *t = nb_type_c2p(internals_, dst);
     check(t, "nanobind::detail::implicitly_convertible(src=<predicate>, dst=%s): "
              "destination type unknown!", type_name(dst));
 
     lock_internals guard(internals_);
-    size_t size {0};
+    size_t size = 0;
 
     if (t->flags & (uint32_t) type_flags::has_implicit_conversions) {
         while (t->implicit.py && t->implicit.py[size])
diff --git a/extern/nanobind/src/nb_abi.h b/extern/nanobind/src/nb_abi.h
index da704d99f..8eccefa59 100644
--- a/extern/nanobind/src/nb_abi.h
+++ b/extern/nanobind/src/nb_abi.h
@@ -14,7 +14,7 @@
 
 /// Tracks the version of nanobind's internal data structures
 #ifndef NB_INTERNALS_VERSION
-#  define NB_INTERNALS_VERSION 16
+#  define NB_INTERNALS_VERSION 18
 #endif
 
 #if defined(__MINGW32__)
diff --git a/extern/nanobind/src/nb_combined.cpp b/extern/nanobind/src/nb_combined.cpp
index f565ce09f..6abb636a7 100644
--- a/extern/nanobind/src/nb_combined.cpp
+++ b/extern/nanobind/src/nb_combined.cpp
@@ -78,7 +78,9 @@
 #include "nb_enum.cpp"
 #include "nb_ndarray.cpp"
 #include "nb_static_property.cpp"
-#include "nb_ft.cpp"
+#if defined(Py_GIL_DISABLED)
+#  include "nb_ft.cpp"
+#endif
 #include "error.cpp"
 #include "common.cpp"
 #include "implicit.cpp"
diff --git a/extern/nanobind/src/nb_enum.cpp b/extern/nanobind/src/nb_enum.cpp
index 427c0d85d..92e26e4f6 100644
--- a/extern/nanobind/src/nb_enum.cpp
+++ b/extern/nanobind/src/nb_enum.cpp
@@ -92,7 +92,7 @@ PyObject *enum_create(enum_init_data *ed) noexcept {
         internals_->type_c2p_slow[ed->type] = t;
 
         #if !defined(NB_FREE_THREADED)
-            internals_->type_c2p_fast[ed->type] = t;
+            internals_->type_c2p_fast[(void *) ed->type] = t;
         #endif
     }
 
@@ -190,7 +190,8 @@ bool enum_from_python(const std::type_info *tp, PyObject *o, int64_t *out, uint8
         return false;
 
     if ((t->flags & (uint32_t) enum_flags::is_flag) != 0 && Py_TYPE(o) == t->type_py) {
-        PyObject *value_o = PyObject_GetAttrString(o, "value");
+        PyObject *value_o =
+                PyObject_GetAttr(o, static_pyobjects[pyobj_name::value_str]);
         if (value_o == nullptr) {
             PyErr_Clear();
             return false;
diff --git a/extern/nanobind/src/nb_func.cpp b/extern/nanobind/src/nb_func.cpp
index 915b2fca8..4d39f3ef5 100644
--- a/extern/nanobind/src/nb_func.cpp
+++ b/extern/nanobind/src/nb_func.cpp
@@ -197,15 +197,12 @@ char *strdup_check(const char *s) {
  *
  * This is an implementation detail of nanobind::cpp_function.
  */
-PyObject *nb_func_new(const void *in_) noexcept {
-    func_data_prelim<0> *f = (func_data_prelim<0> *) in_;
-    arg_data *args_in = std::launder((arg_data *) f->args);
-
+PyObject *nb_func_new(const func_data_prelim_base *f) noexcept {
     bool has_scope       = f->flags & (uint32_t) func_flags::has_scope,
          has_name        = f->flags & (uint32_t) func_flags::has_name,
          has_args        = f->flags & (uint32_t) func_flags::has_args,
-         has_var_args    = f->flags & (uint32_t) func_flags::has_var_kwargs,
-         has_var_kwargs  = f->flags & (uint32_t) func_flags::has_var_args,
+         has_var_args    = f->flags & (uint32_t) func_flags::has_var_args,
+         has_var_kwargs  = f->flags & (uint32_t) func_flags::has_var_kwargs,
          can_mutate_args = f->flags & (uint32_t) func_flags::can_mutate_args,
          has_doc         = f->flags & (uint32_t) func_flags::has_doc,
          has_signature   = f->flags & (uint32_t) func_flags::has_signature,
@@ -217,6 +214,10 @@ PyObject *nb_func_new(const void *in_) noexcept {
          is_new          = false,
          is_setstate     = false;
 
+    arg_data *args_in = nullptr;
+    if (has_args)
+        args_in = std::launder((arg_data*) ((func_data_prelim<1>*) f)->args);
+
     PyObject *name = nullptr;
     PyObject *func_prev = nullptr;
 
@@ -274,12 +275,12 @@ PyObject *nb_func_new(const void *in_) noexcept {
         //   f->nargs = C++ argument count.
         //   f->descr_types = zero-terminated array of bound types among them.
         //     Hence of size >= 2 for constructors, where f->descr_types[1] my be null.
-        //   f->args = array of Python arguments (nb::arg). Non-empty if has_args.
+        //   args_in = array of Python arguments (nb::arg). Non-empty if has_args.
         //   By contrast, fc->args below has size f->nargs.
         if (is_constructor && f->nargs == 2 && f->descr_types[0] &&
             f->descr_types[0] == f->descr_types[1]) {
             if (has_args) {
-                f->args[0].flag &= ~(uint8_t) cast_flags::convert;
+                args_in[0].flag &= ~(uint8_t) cast_flags::convert;
             } else {
                 args_in = method_args + 1;
                 has_args = true;
@@ -298,7 +299,7 @@ PyObject *nb_func_new(const void *in_) noexcept {
 
     // Check if the complex dispatch loop is needed
     bool complex_call = can_mutate_args || has_var_kwargs || has_var_args ||
-                        f->nargs >= NB_MAXARGS_SIMPLE;
+                        f->nargs > NB_MAXARGS_SIMPLE;
 
     if (has_args) {
         for (size_t i = is_method; i < f->nargs; ++i) {
@@ -362,7 +363,7 @@ PyObject *nb_func_new(const void *in_) noexcept {
 #endif
 
     func_data *fc = nb_func_data(func) + prev_overloads;
-    memcpy(fc, f, sizeof(func_data_prelim<0>));
+    memcpy(fc, f, sizeof(func_data_prelim_base));
     if (has_doc) {
         if (fc->doc[0] == '\n')
             fc->doc++;
@@ -605,7 +606,7 @@ static PyObject *nb_func_vectorcall_complex(PyObject *self,
                                             size_t nargsf,
                                             PyObject *kwargs_in) noexcept {
     const size_t count      = (size_t) Py_SIZE(self),
-                 nargs_in   = (size_t) NB_VECTORCALL_NARGS(nargsf),
+                 nargs_in   = (size_t) PyVectorcall_NARGS(nargsf),
                  nkwargs_in = kwargs_in ? (size_t) NB_TUPLE_GET_SIZE(kwargs_in) : 0;
 
     func_data *fr = nb_func_data(self);
@@ -689,16 +690,16 @@ static PyObject *nb_func_vectorcall_complex(PyObject *self,
            entries using keyword arguments or default argument values provided
            in the bindings, if available.
 
-        3. Ensure that either all keyword arguments were "consumed", or that
+        2. Ensure that either all keyword arguments were "consumed", or that
            the function takes a kwargs argument to accept unconsumed kwargs.
 
-        4. Any positional arguments still left get put into a tuple (for args),
+        3. Any positional arguments still left get put into a tuple (for args),
            and any leftover kwargs get put into a dict.
 
-        5. Pack everything into a vector; if we have nb::args or nb::kwargs, they are an
-           extra tuple or dict at the end of the positional arguments.
+        4. Pack everything into a vector; if we have nb::args or nb::kwargs,
+           they become a tuple or dict at the end of the positional arguments.
 
-        6. Call the function call dispatcher (func_data::impl)
+        5. Call the function call dispatcher (func_data::impl)
 
         If one of these fail, move on to the next overload and keep trying
         until we get a result other than NB_NEXT_OVERLOAD.
@@ -877,7 +878,8 @@ static PyObject *nb_func_vectorcall_complex(PyObject *self,
     return result;
 }
 
-/// Simplified nb_func_vectorcall variant for functions w/o keyword arguments
+/// Simplified nb_func_vectorcall variant for functions w/o keyword arguments,
+/// w/o default arguments, with no more than NB_MAXARGS_SIMPLE arguments, etc.
 static PyObject *nb_func_vectorcall_simple(PyObject *self,
                                            PyObject *const *args_in,
                                            size_t nargsf,
@@ -886,7 +888,7 @@ static PyObject *nb_func_vectorcall_simple(PyObject *self,
     func_data *fr = nb_func_data(self);
 
     const size_t count         = (size_t) Py_SIZE(self),
-                 nargs_in      = (size_t) NB_VECTORCALL_NARGS(nargsf);
+                 nargs_in      = (size_t) PyVectorcall_NARGS(nargsf);
 
     const bool is_method      = fr->flags & (uint32_t) func_flags::is_method,
                is_constructor = fr->flags & (uint32_t) func_flags::is_constructor;
@@ -976,7 +978,7 @@ static PyObject *nb_func_vectorcall_simple_0(PyObject *self,
                                              size_t nargsf,
                                              PyObject *kwargs_in) noexcept {
     func_data *fr = nb_func_data(self);
-    const size_t nargs_in = (size_t) NB_VECTORCALL_NARGS(nargsf);
+    const size_t nargs_in = (size_t) PyVectorcall_NARGS(nargsf);
 
     // Handler routine that will be invoked in case of an error condition
     PyObject *(*error_handler)(PyObject *, PyObject *const *, size_t,
@@ -1016,7 +1018,7 @@ static PyObject *nb_func_vectorcall_simple_1(PyObject *self,
                                              size_t nargsf,
                                              PyObject *kwargs_in) noexcept {
     func_data *fr = nb_func_data(self);
-    const size_t nargs_in = (size_t) NB_VECTORCALL_NARGS(nargsf);
+    const size_t nargs_in = (size_t) PyVectorcall_NARGS(nargsf);
     bool is_constructor = fr->flags & (uint32_t) func_flags::is_constructor;
 
     // Handler routine that will be invoked in case of an error condition
@@ -1073,12 +1075,12 @@ static PyObject *nb_bound_method_vectorcall(PyObject *self,
                                             size_t nargsf,
                                             PyObject *kwargs_in) noexcept {
     nb_bound_method *mb = (nb_bound_method *) self;
-    size_t nargs = (size_t) NB_VECTORCALL_NARGS(nargsf);
+    size_t nargs = (size_t) PyVectorcall_NARGS(nargsf);
     const size_t buf_size = 5;
     PyObject **args, *args_buf[buf_size], *temp = nullptr, *result;
     bool alloc = false;
 
-    if (NB_LIKELY(nargsf & NB_VECTORCALL_ARGUMENTS_OFFSET)) {
+    if (NB_LIKELY(nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
         args = (PyObject **) (args_in - 1);
         temp = args[0];
     } else {
@@ -1095,7 +1097,8 @@ static PyObject *nb_bound_method_vectorcall(PyObject *self,
             alloc = true;
         }
 
-        memcpy(args + 1, args_in, sizeof(PyObject *) * (size - 1));
+        if (size > 1)
+            memcpy(args + 1, args_in, sizeof(PyObject *) * (size - 1));
     }
 
     args[0] = mb->self;
@@ -1511,7 +1514,6 @@ PyObject *nb_func_get_doc(PyObject *self, void *) {
     return PyUnicode_FromString(buf.get());
 }
 
-// PyGetSetDef entry for __module__ is ignored in Python 3.8
 PyObject *nb_func_getattro(PyObject *self, PyObject *name_) {
     const char *name = PyUnicode_AsUTF8AndSize(name_, nullptr);
 
diff --git a/extern/nanobind/src/nb_internals.cpp b/extern/nanobind/src/nb_internals.cpp
index 4adf53004..497d4410f 100644
--- a/extern/nanobind/src/nb_internals.cpp
+++ b/extern/nanobind/src/nb_internals.cpp
@@ -32,11 +32,6 @@ extern int nb_bound_method_clear(PyObject *);
 extern void nb_bound_method_dealloc(PyObject *);
 extern PyObject *nb_method_descr_get(PyObject *, PyObject *, PyObject *);
 
-#if PY_VERSION_HEX >= 0x03090000
-#  define NB_HAVE_VECTORCALL_PY39_OR_NEWER NB_HAVE_VECTORCALL
-#else
-#  define NB_HAVE_VECTORCALL_PY39_OR_NEWER 0
-#endif
 
 static PyType_Slot nb_meta_slots[] = {
     { Py_tp_base, nullptr },
@@ -47,7 +42,8 @@ static PyType_Spec nb_meta_spec = {
     /* .name = */ "nanobind.nb_meta",
     /* .basicsize = */ 0,
     /* .itemsize = */ 0,
-    /* .flags = */ Py_TPFLAGS_DEFAULT,
+    /* .flags = */ Py_TPFLAGS_DEFAULT |
+                   NB_TPFLAGS_IMMUTABLETYPE,
     /* .slots = */ nb_meta_slots
 };
 
@@ -70,7 +66,6 @@ static PyType_Slot nb_func_slots[] = {
     { Py_tp_traverse, (void *) nb_func_traverse },
     { Py_tp_clear, (void *) nb_func_clear },
     { Py_tp_dealloc, (void *) nb_func_dealloc },
-    { Py_tp_traverse, (void *) nb_func_traverse },
     { Py_tp_new, (void *) PyType_GenericNew },
     { Py_tp_call, (void *) PyVectorcall_Call },
     { 0, nullptr }
@@ -80,8 +75,10 @@ static PyType_Spec nb_func_spec = {
     /* .name = */ "nanobind.nb_func",
     /* .basicsize = */ (int) sizeof(nb_func),
     /* .itemsize = */ (int) sizeof(func_data),
-    /* .flags = */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
-                   NB_HAVE_VECTORCALL_PY39_OR_NEWER,
+    /* .flags = */ Py_TPFLAGS_DEFAULT |
+                   Py_TPFLAGS_HAVE_GC |
+                   Py_TPFLAGS_HAVE_VECTORCALL |
+                   NB_TPFLAGS_IMMUTABLETYPE,
     /* .slots = */ nb_func_slots
 };
 
@@ -102,9 +99,11 @@ static PyType_Spec nb_method_spec = {
     /*.name = */ "nanobind.nb_method",
     /*.basicsize = */ (int) sizeof(nb_func),
     /*.itemsize = */ (int) sizeof(func_data),
-    /*.flags = */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
+    /*.flags = */ Py_TPFLAGS_DEFAULT |
+                  Py_TPFLAGS_HAVE_GC |
                   Py_TPFLAGS_METHOD_DESCRIPTOR |
-                  NB_HAVE_VECTORCALL_PY39_OR_NEWER,
+                  Py_TPFLAGS_HAVE_VECTORCALL |
+                  NB_TPFLAGS_IMMUTABLETYPE,
     /*.slots = */ nb_method_slots
 };
 
@@ -124,7 +123,6 @@ static PyType_Slot nb_bound_method_slots[] = {
     { Py_tp_traverse, (void *) nb_bound_method_traverse },
     { Py_tp_clear, (void *) nb_bound_method_clear },
     { Py_tp_dealloc, (void *) nb_bound_method_dealloc },
-    { Py_tp_traverse, (void *) nb_bound_method_traverse },
     { Py_tp_call, (void *) PyVectorcall_Call },
     { 0, nullptr }
 };
@@ -133,8 +131,10 @@ static PyType_Spec nb_bound_method_spec = {
     /* .name = */ "nanobind.nb_bound_method",
     /* .basicsize = */ (int) sizeof(nb_bound_method),
     /* .itemsize = */ 0,
-    /* .flags = */ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
-                   NB_HAVE_VECTORCALL_PY39_OR_NEWER,
+    /* .flags = */ Py_TPFLAGS_DEFAULT |
+                   Py_TPFLAGS_HAVE_GC |
+                   Py_TPFLAGS_HAVE_VECTORCALL |
+                   NB_TPFLAGS_IMMUTABLETYPE,
     /* .slots = */ nb_bound_method_slots
 };
 
@@ -164,6 +164,73 @@ void default_exception_translator(const std::exception_ptr &p, void *) {
 nb_internals *internals = nullptr;
 PyTypeObject *nb_meta_cache = nullptr;
 
+
+static const char* interned_c_strs[pyobj_name::string_count] {
+    "value",
+    "copy",
+    "clone",
+    "array",
+    "from_dlpack",
+    "__dlpack__",
+    "max_version",
+    "dl_device",
+};
+
+PyObject **static_pyobjects = nullptr;
+
+static bool init_pyobjects(PyObject* m) {
+    PyObject** pyobjects = (PyObject**) PyModule_GetState(m);
+    if (!pyobjects)
+        return false;
+
+    NB_NOUNROLL
+    for (int i = 0; i < pyobj_name::string_count; ++i)
+        pyobjects[i] = PyUnicode_InternFromString(interned_c_strs[i]);
+
+    pyobjects[pyobj_name::copy_tpl] =
+            PyTuple_Pack(1, pyobjects[pyobj_name::copy_str]);
+    pyobjects[pyobj_name::max_version_tpl] =
+            PyTuple_Pack(1, pyobjects[pyobj_name::max_version_str]);
+
+    PyObject* one = PyLong_FromLong(1);
+    PyObject* zero = PyLong_FromLong(0);
+    pyobjects[pyobj_name::dl_cpu_tpl] = PyTuple_Pack(2, one, zero);
+    Py_DECREF(zero);
+    Py_DECREF(one);
+
+    PyObject* major = PyLong_FromLong(dlpack::major_version);
+    PyObject* minor = PyLong_FromLong(dlpack::minor_version);
+    pyobjects[pyobj_name::dl_version_tpl] = PyTuple_Pack(2, major, minor);
+    Py_DECREF(minor);
+    Py_DECREF(major);
+
+    static_pyobjects = pyobjects;
+
+    return true;
+}
+
+NB_NOINLINE int nb_module_traverse(PyObject *m, visitproc visit, void *arg) {
+    PyObject** pyobjects = (PyObject**) PyModule_GetState(m);
+    NB_NOUNROLL
+    for (int i = 0; i < pyobj_name::total_count; ++i)
+        Py_VISIT(pyobjects[i]);
+    return 0;
+}
+
+NB_NOINLINE int nb_module_clear(PyObject *m) {
+    PyObject** pyobjects = (PyObject**) PyModule_GetState(m);
+    NB_NOUNROLL
+    for (int i = 0; i < pyobj_name::total_count; ++i)
+        Py_CLEAR(pyobjects[i]);
+    return 0;
+}
+
+void nb_module_free(void *m) {
+    // Allow nanobind_##name##_exec to omit calling nb_module_clear on error.
+    (void) nb_module_clear((PyObject *) m);
+}
+
+
 static bool is_alive_value = false;
 static bool *is_alive_ptr = &is_alive_value;
 bool is_alive() noexcept { return *is_alive_ptr; }
@@ -273,8 +340,8 @@ static void internals_cleanup() {
             for (auto [f, p2] : p->funcs) {
                 fprintf(stderr, " - leaked function \"%s\"\n",
                         nb_func_data(f)->name);
+                INC_CTR;
                 if (ctr == 10) {
-                    INC_CTR;
                     fprintf(stderr, " - ... skipped remainder\n");
                     break;
                 }
@@ -317,29 +384,32 @@ static void internals_cleanup() {
 #endif
 }
 
-NB_NOINLINE void init(const char *name) {
+NB_NOINLINE void nb_module_exec(const char *name, PyObject *m) {
     if (internals)
         return;
 
+    check(init_pyobjects(m), "nanobind::detail::nb_module_exec(): "
+                             "could not initialize module state!");
+
 #if defined(PYPY_VERSION)
     PyObject *dict = PyEval_GetBuiltins();
-#elif PY_VERSION_HEX < 0x03090000
-    PyObject *dict = PyInterpreterState_GetDict(_PyInterpreterState_Get());
 #else
     PyObject *dict = PyInterpreterState_GetDict(PyInterpreterState_Get());
 #endif
-    check(dict, "nanobind::detail::init(): could not access internals dictionary!");
+    check(dict, "nanobind::detail::nb_module_exec(): "
+                "could not access internals dictionary!");
 
     PyObject *key = PyUnicode_FromFormat("__nb_internals_%s_%s__",
                                          abi_tag(), name ? name : "");
-    check(key, "nanobind::detail::init(): could not create dictionary key!");
+    check(key, "nanobind::detail::nb_module_exec(): "
+               "could not create dictionary key!");
 
     PyObject *capsule = dict_get_item_ref_or_fail(dict, key);
     if (capsule) {
         Py_DECREF(key);
         internals = (nb_internals *) PyCapsule_GetPointer(capsule, "nb_internals");
-        check(internals,
-              "nanobind::detail::internals_fetch(): capsule pointer is NULL!");
+        check(internals, "nanobind::detail::nb_module_exec(): "
+                         "capsule pointer is NULL!");
         nb_meta_cache = internals->nb_meta;
         is_alive_ptr = internals->is_alive_ptr;
         Py_DECREF(capsule);
@@ -374,23 +444,9 @@ NB_NOINLINE void init(const char *name) {
     PyThread_tss_create(p->nb_static_property_disabled);
 #endif
 
-    for (size_t i = 0; i < shard_count; ++i) {
-        p->shards[i].keep_alive.min_load_factor(.1f);
-        p->shards[i].inst_c2p.min_load_factor(.1f);
-    }
-
     check(p->nb_module && p->nb_meta && p->nb_type_dict && p->nb_func &&
               p->nb_method && p->nb_bound_method,
-          "nanobind::detail::init(): initialization failed!");
-
-#if PY_VERSION_HEX < 0x03090000
-    p->nb_func->tp_flags |= NB_HAVE_VECTORCALL;
-    p->nb_func->tp_vectorcall_offset = offsetof(nb_func, vectorcall);
-    p->nb_method->tp_flags |= NB_HAVE_VECTORCALL;
-    p->nb_method->tp_vectorcall_offset = offsetof(nb_func, vectorcall);
-    p->nb_bound_method->tp_flags |= NB_HAVE_VECTORCALL;
-    p->nb_bound_method->tp_vectorcall_offset = offsetof(nb_bound_method, vectorcall);
-#endif
+          "nanobind::detail::nb_module_exec(): initialization failed!");
 
 #if defined(Py_LIMITED_API)
     // Cache important functions from PyType_Type and PyProperty_Type
@@ -427,6 +483,7 @@ NB_NOINLINE void init(const char *name) {
 #endif
 
     p->translators = { default_exception_translator, nullptr, nullptr };
+
     is_alive_value = true;
     is_alive_ptr = &is_alive_value;
     p->is_alive_ptr = is_alive_ptr;
@@ -476,7 +533,7 @@ NB_NOINLINE void init(const char *name) {
     capsule = PyCapsule_New(p, "nb_internals", nullptr);
     int rv = PyDict_SetItem(dict, key, capsule);
     check(!rv && capsule,
-          "nanobind::detail::init(): capsule creation failed!");
+          "nanobind::detail::nb_module_exec(): capsule creation failed!");
     Py_DECREF(capsule);
     Py_DECREF(key);
     internals = p;
diff --git a/extern/nanobind/src/nb_internals.h b/extern/nanobind/src/nb_internals.h
index ca79920dd..a0f45f630 100644
--- a/extern/nanobind/src/nb_internals.h
+++ b/extern/nanobind/src/nb_internals.h
@@ -31,6 +31,12 @@
 #  define NB_THREAD_LOCAL __thread
 #endif
 
+#if PY_VERSION_HEX >= 0x030A0000
+#  define NB_TPFLAGS_IMMUTABLETYPE Py_TPFLAGS_IMMUTABLETYPE
+#else
+#  define NB_TPFLAGS_IMMUTABLETYPE 0
+#endif
+
 NAMESPACE_BEGIN(NB_NAMESPACE)
 NAMESPACE_BEGIN(detail)
 
@@ -42,7 +48,7 @@ NAMESPACE_BEGIN(detail)
 #endif
 
 /// Nanobind function metadata (overloads, etc.)
-struct func_data : func_data_prelim<0> {
+struct func_data : func_data_prelim_base {
     arg_data *args;
     char *signature;
 };
@@ -56,7 +62,7 @@ struct nb_inst { // usually: 24 bytes
 
     /// State of the C++ object this instance points to: is it constructed?
     /// can we use it?
-    uint32_t state : 2;
+    uint8_t state : 2;
 
     // Values for `state`. Note that the numeric values of these are relied upon
     // for an optimization in `nb_type_get()`.
@@ -70,25 +76,27 @@ struct nb_inst { // usually: 24 bytes
      * relative offset to a pointer that must be dereferenced to get to the
      * instance data. 'direct' is 'true' in the former case.
      */
-    uint32_t direct : 1;
+    uint8_t direct : 1;
 
     /// Is the instance data co-located with the Python object?
-    uint32_t internal : 1;
+    uint8_t internal : 1;
 
     /// Should the destructor be called when this instance is GCed?
-    uint32_t destruct : 1;
+    uint8_t destruct : 1;
 
     /// Should nanobind call 'operator delete' when this instance is GCed?
-    uint32_t cpp_delete : 1;
-
-    /// Does this instance hold references to others? (via internals.keep_alive)
-    uint32_t clear_keep_alive : 1;
+    uint8_t cpp_delete : 1;
 
     /// Does this instance use intrusive reference counting?
-    uint32_t intrusive : 1;
+    uint8_t intrusive : 1;
+
+    /// Does this instance hold references to others? (via internals.keep_alive)
+    /// This may be accessed concurrently to 'state', so it must not be in
+    /// the same bitfield as 'state'.
+    uint8_t clear_keep_alive;
 
     // That's a lot of unused space. I wonder if there is a good use for it..
-    uint32_t unused : 24;
+    uint16_t unused;
 };
 
 static_assert(sizeof(nb_inst) == sizeof(PyObject) + sizeof(uint32_t) * 2);
@@ -181,14 +189,14 @@ struct std_typeinfo_eq {
     }
 };
 
-using nb_type_map_fast = tsl::robin_map<const std::type_info *, type_data *, ptr_hash>;
-using nb_type_map_slow = tsl::robin_map<const std::type_info *, type_data *,
-                                        std_typeinfo_hash, std_typeinfo_eq>;
-
 /// A simple pointer-to-pointer map that is reused a few times below (even if
 /// not 100% ideal) to avoid template code generation bloat.
 using nb_ptr_map  = tsl::robin_map<void *, void*, ptr_hash>;
 
+using nb_type_map_fast = nb_ptr_map;
+using nb_type_map_slow = tsl::robin_map<const std::type_info *, type_data *,
+                                        std_typeinfo_hash, std_typeinfo_eq>;
+
 /// Convenience functions to deal with the pointer encoding in 'internals.inst_c2p'
 
 /// Does this entry store a linked list of instances?
@@ -420,6 +428,32 @@ struct nb_internals {
     size_t shard_count = 1;
 };
 
+// Names for the PyObject* entries in the per-module state array.
+// These names are scoped, but will implicitly convert to int.
+struct pyobj_name {
+    enum : int {
+        value_str = 0,      // string "value"
+        copy_str,           // string "copy"
+        clone_str,          // string "clone"
+        array_str,          // string "array"
+        from_dlpack_str,    // string "from_dlpack"
+        dunder_dlpack_str,  // string "__dlpack__"
+        max_version_str,    // string "max_version"
+        dl_device_str,      // string "dl_device"
+        string_count,
+
+        copy_tpl = string_count,  // tuple ("copy")
+        max_version_tpl, // tuple ("max_version")
+        dl_cpu_tpl,      // tuple (1, 0), which corresponds to nb::device::cpu
+        dl_version_tpl,  // tuple (dlpack::major_version, dlpack::minor_version)
+        total_count
+    };
+};
+
+static_assert(pyobj_name::total_count * sizeof(PyObject*) == NB_MOD_STATE_SIZE);
+
+extern PyObject **static_pyobjects;
+
 /// Convenience macro to potentially access cached functions
 #if defined(Py_LIMITED_API)
 #  define NB_SLOT(type, name) internals->type##_##name
@@ -466,10 +500,12 @@ inline void *inst_ptr(nb_inst *self) {
 }
 
 template <typename T> struct scoped_pymalloc {
-    scoped_pymalloc(size_t size = 1) {
-        ptr = (T *) PyMem_Malloc(size * sizeof(T));
+    scoped_pymalloc(size_t size = 1, size_t extra_bytes = 0) {
+        // Tip: construct objects in the extra bytes using placement new.
+        ptr = (T *) PyMem_Malloc(size * sizeof(T) + extra_bytes);
         if (!ptr)
-            fail("scoped_pymalloc(): could not allocate %zu bytes of memory!", size);
+            fail("scoped_pymalloc(): could not allocate %llu bytes of memory!",
+                 (unsigned long long) (size * sizeof(T) + extra_bytes));
     }
     ~scoped_pymalloc() { PyMem_Free(ptr); }
     T *release() {
diff --git a/extern/nanobind/src/nb_ndarray.cpp b/extern/nanobind/src/nb_ndarray.cpp
index d84177a8b..2f6d93a55 100644
--- a/extern/nanobind/src/nb_ndarray.cpp
+++ b/extern/nanobind/src/nb_ndarray.cpp
@@ -1,39 +1,139 @@
 #include <nanobind/ndarray.h>
 #include <atomic>
+#include <memory>
 #include "nb_internals.h"
 
 NAMESPACE_BEGIN(NB_NAMESPACE)
-NAMESPACE_BEGIN(detail)
+
+NAMESPACE_BEGIN(dlpack)
+
+/// Indicates the managed_dltensor_versioned is read only.
+static constexpr uint64_t flag_bitmask_read_only = 1UL << 0;
+
+struct version {
+    uint32_t major;
+    uint32_t minor;
+};
+
+NAMESPACE_END(dlpack)
 
 // ========================================================================
 
+NAMESPACE_BEGIN(detail)
+
+// DLPack version 0, deprecated Feb 2024, obsoleted March 2025
 struct managed_dltensor {
     dlpack::dltensor dltensor;
     void *manager_ctx;
     void (*deleter)(managed_dltensor *);
 };
 
-struct ndarray_handle {
-    managed_dltensor *ndarray;
-    std::atomic<size_t> refcount;
-    PyObject *owner, *self;
-    bool free_shape;
-    bool free_strides;
-    bool call_deleter;
-    bool ro;
+// DLPack version 1, pre-release Feb 2024, release Sep 2024
+struct managed_dltensor_versioned {
+    dlpack::version version;
+    void *manager_ctx;
+    void (*deleter)(managed_dltensor_versioned *);
+    uint64_t flags = 0UL;
+    dlpack::dltensor dltensor;
 };
 
-static void ndarray_capsule_destructor(PyObject *o) {
-    error_scope scope; // temporarily save any existing errors
-    managed_dltensor *mt =
-        (managed_dltensor *) PyCapsule_GetPointer(o, "dltensor");
+static void mt_from_buffer_delete(managed_dltensor_versioned* self) {
+    gil_scoped_acquire guard;
+    Py_buffer *buf = (Py_buffer *) self->manager_ctx;
+    PyBuffer_Release(buf);
+    PyMem_Free(buf);
+    PyMem_Free(self);  // This also frees shape and size arrays.
+}
+
+// Forward declaration
+struct ndarray_handle;
+
+template<typename MT>
+static void mt_from_handle_delete(MT* self) {
+    gil_scoped_acquire guard;
+    ndarray_handle* th = (ndarray_handle *) self->manager_ctx;
+    PyMem_Free(self);
+    ndarray_dec_ref(th);
+}
+
+template<bool versioned>
+static void capsule_delete(PyObject *capsule) {
+    const char* capsule_name;
+    if constexpr (versioned)
+        capsule_name = "dltensor_versioned";
+    else
+        capsule_name = "dltensor";
 
+    using MT = std::conditional_t<versioned, managed_dltensor_versioned,
+                                             managed_dltensor>;
+    error_scope scope; // temporarily save any existing errors
+    MT* mt = (MT*) PyCapsule_GetPointer(capsule, capsule_name);
     if (mt)
-        ndarray_dec_ref((ndarray_handle *) mt->manager_ctx);
+        mt->deleter(mt);
     else
         PyErr_Clear();
 }
 
+// Reference-counted wrapper for versioned or unversioned managed tensors
+struct ndarray_handle {
+    union {
+        managed_dltensor           *mt_unversioned;
+        managed_dltensor_versioned *mt_versioned;
+    };
+    std::atomic<size_t> refcount;
+    PyObject *owner, *self;
+    bool versioned;     // This tags which union member is active.
+    bool free_strides;  // True if we added strides to an imported tensor.
+    bool call_deleter;  // True if tensor was imported, else PyMem_Free(mt).
+    bool ro;            // Whether tensor is read-only.
+
+    PyObject* make_capsule_unversioned() {
+        PyObject* capsule;
+        if (!versioned && mt_unversioned->manager_ctx == this) {
+            capsule = PyCapsule_New(mt_unversioned, "dltensor",
+                                    capsule_delete</*versioned=*/false>);
+        } else {
+            scoped_pymalloc<managed_dltensor> mt;
+            memcpy(&mt->dltensor,
+                   (versioned) ? &mt_versioned->dltensor
+                               : &mt_unversioned->dltensor,
+                   sizeof(dlpack::dltensor));
+            mt->manager_ctx = this;
+            mt->deleter = mt_from_handle_delete<managed_dltensor>;
+            capsule = PyCapsule_New(mt.release(), "dltensor",
+                                    capsule_delete</*versioned=*/false>);
+        }
+        check(capsule, "Could not make unversioned capsule");
+        refcount++;
+        return capsule;
+    }
+
+    PyObject* make_capsule_versioned() {
+        PyObject* capsule;
+        if (versioned && mt_versioned->manager_ctx == this) {
+            capsule = PyCapsule_New(mt_versioned, "dltensor_versioned",
+                                    capsule_delete</*versioned=*/true>);
+        } else {
+            scoped_pymalloc<managed_dltensor_versioned> mt;
+            mt->version = {dlpack::major_version, dlpack::minor_version};
+            mt->manager_ctx = this;
+            mt->deleter = mt_from_handle_delete<managed_dltensor_versioned>;
+            mt->flags = (ro) ? dlpack::flag_bitmask_read_only : 0;
+            memcpy(&mt->dltensor,
+                   (versioned) ? &mt_versioned->dltensor
+                               : &mt_unversioned->dltensor,
+                   sizeof(dlpack::dltensor));
+            capsule = PyCapsule_New(mt.release(), "dltensor_versioned",
+                                    capsule_delete</*versioned=*/true>);
+        }
+        check(capsule, "Could not make versioned capsule");
+        refcount++;
+        return capsule;
+    }
+};
+
+// ========================================================================
+
 static void nb_ndarray_dealloc(PyObject *self) {
     PyTypeObject *tp = Py_TYPE(self);
     ndarray_dec_ref(((nb_ndarray *) self)->th);
@@ -41,10 +141,10 @@ static void nb_ndarray_dealloc(PyObject *self) {
     Py_DECREF(tp);
 }
 
-static int nd_ndarray_tpbuffer(PyObject *exporter, Py_buffer *view, int) {
-    nb_ndarray *self = (nb_ndarray *) exporter;
-
-    dlpack::dltensor &t = self->th->ndarray->dltensor;
+static int nb_ndarray_getbuffer(PyObject *self, Py_buffer *view, int) {
+    ndarray_handle *th = ((nb_ndarray *) self)->th;
+    dlpack::dltensor &t = (th->versioned) ? th->mt_versioned->dltensor
+                                          : th->mt_unversioned->dltensor;
 
     if (t.device.device_type != device::cpu::value) {
         PyErr_SetString(PyExc_BufferError, "Only CPU-allocated ndarrays can be "
@@ -96,84 +196,123 @@ static int nd_ndarray_tpbuffer(PyObject *exporter, Py_buffer *view, int) {
     }
 
     if (!format || t.dtype.lanes != 1) {
-        PyErr_SetString(
-            PyExc_BufferError,
-            "Don't know how to convert DLPack dtype into buffer protocol format!");
+        PyErr_SetString(PyExc_BufferError,
+            "Cannot convert DLPack dtype into buffer protocol format!");
         return -1;
     }
 
-    view->format = (char *) format;
-    view->itemsize = t.dtype.bits / 8;
     view->buf = (void *) ((uintptr_t) t.data + t.byte_offset);
-    view->obj = exporter;
-    Py_INCREF(exporter);
+    view->obj = self;
+    Py_INCREF(self);
 
-    Py_ssize_t len = view->itemsize;
-    scoped_pymalloc<Py_ssize_t> strides((size_t) t.ndim),
-                                  shape((size_t) t.ndim);
+    scoped_pymalloc<Py_ssize_t> shape_and_strides(2 * (size_t) t.ndim);
+    Py_ssize_t* shape = shape_and_strides.get();
+    Py_ssize_t* strides = shape + t.ndim;
 
+    const Py_ssize_t itemsize = t.dtype.bits / 8;
+    Py_ssize_t len = itemsize;
     for (size_t i = 0; i < (size_t) t.ndim; ++i) {
         len *= (Py_ssize_t) t.shape[i];
-        strides[i] = (Py_ssize_t) t.strides[i] * view->itemsize;
         shape[i] = (Py_ssize_t) t.shape[i];
+        strides[i] = (Py_ssize_t) t.strides[i] * itemsize;
     }
 
-    view->ndim = t.ndim;
     view->len = len;
-    view->readonly = self->th->ro;
+    view->itemsize = itemsize;
+    view->readonly = th->ro;
+    view->ndim = t.ndim;
+    view->format = (char *) format;
+    view->shape = shape;
+    view->strides = strides;
     view->suboffsets = nullptr;
-    view->internal = nullptr;
-    view->strides = strides.release();
-    view->shape = shape.release();
+    view->internal = shape_and_strides.release();
 
     return 0;
 }
 
 static void nb_ndarray_releasebuffer(PyObject *, Py_buffer *view) {
-    PyMem_Free(view->shape);
-    PyMem_Free(view->strides);
+    PyMem_Free(view->internal);
 }
 
+// This function implements __dlpack__() for a nanobind.nb_ndarray.
+static PyObject *nb_ndarray_dlpack(PyObject *self, PyObject *const *args,
+                                   Py_ssize_t nargsf, PyObject *kwnames) {
+    if (PyVectorcall_NARGS(nargsf) != 0) {
+        PyErr_SetString(PyExc_TypeError,
+                "__dlpack__() does not accept positional arguments");
+        return nullptr;
+    }
+    Py_ssize_t nkwargs = (kwnames) ? NB_TUPLE_GET_SIZE(kwnames) : 0;
+
+    long max_major_version = 0;
+    for (Py_ssize_t i = 0; i < nkwargs; ++i) {
+        PyObject* key = NB_TUPLE_GET_ITEM(kwnames, i);
+        if (key == static_pyobjects[pyobj_name::dl_device_str] ||
+            key == static_pyobjects[pyobj_name::copy_str])
+            // These keyword arguments are ignored.  This branch of the code
+            // is here to avoid a Python call to RichCompare if these kwargs
+            // are provided by the caller.
+            continue;
+        if (key == static_pyobjects[pyobj_name::max_version_str] ||
+            PyObject_RichCompareBool(key,
+                static_pyobjects[pyobj_name::max_version_str], Py_EQ) == 1) {
+            PyObject* value = args[i];
+            if (value == Py_None)
+                break;
+            if (!PyTuple_Check(value) || NB_TUPLE_GET_SIZE(value) != 2) {
+                PyErr_SetString(PyExc_TypeError,
+                        "max_version must be None or tuple[int, int]");
+                return nullptr;
+            }
+            max_major_version = PyLong_AsLong(NB_TUPLE_GET_ITEM(value, 0));
+            break;
+        }
+    }
 
-static PyObject *nb_ndarray_dlpack(PyObject *self, PyTypeObject *,
-                            PyObject *const *, Py_ssize_t ,
-                            PyObject *) {
-    nb_ndarray *self_nd = (nb_ndarray *) self;
-    ndarray_handle *th = self_nd->th;
+    ndarray_handle *th = ((nb_ndarray *) self)->th;
+    PyObject *capsule;
+    if (max_major_version >= dlpack::major_version)
+        capsule = th->make_capsule_versioned();
+    else
+        capsule = th->make_capsule_unversioned();
 
-    PyObject *r =
-        PyCapsule_New(th->ndarray, "dltensor", ndarray_capsule_destructor);
-    if (r)
-        ndarray_inc_ref(th);
-    return r;
+    return capsule;
 }
 
-static PyObject *nb_ndarray_dlpack_device(PyObject *self, PyTypeObject *,
-                            PyObject *const *, Py_ssize_t ,
-                            PyObject *) {
-    nb_ndarray *self_nd = (nb_ndarray *) self;
-    dlpack::dltensor &t = self_nd->th->ndarray->dltensor;
-    PyObject *r = PyTuple_New(2);
-    PyObject *r0 = PyLong_FromLong(t.device.device_type);
-    PyObject *r1 = PyLong_FromLong(t.device.device_id);
-    if (!r || !r0 || !r1) {
-        Py_XDECREF(r);
-        Py_XDECREF(r0);
-        Py_XDECREF(r1);
-        return nullptr;
+// This function implements __dlpack_device__() for a nanobind.nb_ndarray.
+static PyObject *nb_ndarray_dlpack_device(PyObject *self, PyObject *) {
+    ndarray_handle *th = ((nb_ndarray *) self)->th;
+    dlpack::dltensor& t = (th->versioned)
+                              ? th->mt_versioned->dltensor
+                              : th->mt_unversioned->dltensor;
+    PyObject *r;
+    if (t.device.device_type == 1 && t.device.device_id == 0) {
+        r = static_pyobjects[pyobj_name::dl_cpu_tpl];
+        Py_INCREF(r);
+    } else {
+        r = PyTuple_New(2);
+        PyObject *r0 = PyLong_FromLong(t.device.device_type);
+        PyObject *r1 = PyLong_FromLong(t.device.device_id);
+        if (!r || !r0 || !r1) {
+            Py_XDECREF(r);
+            Py_XDECREF(r0);
+            Py_XDECREF(r1);
+            return nullptr;
+        }
+        NB_TUPLE_SET_ITEM(r, 0, r0);
+        NB_TUPLE_SET_ITEM(r, 1, r1);
     }
-    NB_TUPLE_SET_ITEM(r, 0, r0);
-    NB_TUPLE_SET_ITEM(r, 1, r1);
     return r;
 }
 
-static PyMethodDef nb_ndarray_members[] = {
-   { "__dlpack__", (PyCFunction) (void *) nb_ndarray_dlpack, METH_FASTCALL | METH_KEYWORDS, nullptr },
-   { "__dlpack_device__", (PyCFunction) (void *) nb_ndarray_dlpack_device, METH_FASTCALL | METH_KEYWORDS, nullptr },
+static PyMethodDef nb_ndarray_methods[] = {
+   { "__dlpack__", (PyCFunction) (void *) nb_ndarray_dlpack,
+                   METH_FASTCALL | METH_KEYWORDS, nullptr },
+   { "__dlpack_device__", nb_ndarray_dlpack_device, METH_NOARGS, nullptr },
    { nullptr, nullptr, 0, nullptr }
 };
 
-static PyTypeObject *nd_ndarray_tp() noexcept {
+static PyTypeObject *nb_ndarray_tp() noexcept {
     nb_internals *internals_ = internals;
     PyTypeObject *tp = internals_->nb_ndarray.load_acquire();
 
@@ -185,11 +324,9 @@ static PyTypeObject *nd_ndarray_tp() noexcept {
 
         PyType_Slot slots[] = {
             { Py_tp_dealloc, (void *) nb_ndarray_dealloc },
-            { Py_tp_methods, (void *) nb_ndarray_members },
-#if PY_VERSION_HEX >= 0x03090000
-            { Py_bf_getbuffer, (void *) nd_ndarray_tpbuffer },
+            { Py_tp_methods, (void *) nb_ndarray_methods },
+            { Py_bf_getbuffer, (void *) nb_ndarray_getbuffer },
             { Py_bf_releasebuffer, (void *) nb_ndarray_releasebuffer },
-#endif
             { 0, nullptr }
         };
 
@@ -204,25 +341,24 @@ static PyTypeObject *nd_ndarray_tp() noexcept {
         tp = (PyTypeObject *) PyType_FromSpec(&spec);
         check(tp, "nb_ndarray type creation failed!");
 
-#if PY_VERSION_HEX < 0x03090000
-        tp->tp_as_buffer->bf_getbuffer = nd_ndarray_tpbuffer;
-        tp->tp_as_buffer->bf_releasebuffer = nb_ndarray_releasebuffer;
-#endif
-
         internals_->nb_ndarray.store_release(tp);
     }
 
     return tp;
 }
 
-static PyObject *dlpack_from_buffer_protocol(PyObject *o, bool ro) {
-    scoped_pymalloc<Py_buffer> view;
-    scoped_pymalloc<managed_dltensor> mt;
+// ========================================================================
+
+using mt_unique_ptr_t = std::unique_ptr<managed_dltensor_versioned,
+                                        decltype(&mt_from_buffer_delete)>;
 
+static mt_unique_ptr_t make_mt_from_buffer_protocol(PyObject *o, bool ro) {
+    mt_unique_ptr_t mt_unique_ptr(nullptr, &mt_from_buffer_delete);
+    scoped_pymalloc<Py_buffer> view;
     if (PyObject_GetBuffer(o, view.get(),
                            ro ? PyBUF_RECORDS_RO : PyBUF_RECORDS)) {
         PyErr_Clear();
-        return nullptr;
+        return mt_unique_ptr;
     }
 
     char format_c = 'B';
@@ -233,7 +369,7 @@ static PyObject *dlpack_from_buffer_protocol(PyObject *o, bool ro) {
     bool skip_first = format_c == '@' || format_c == '=';
 
     int32_t num = 1;
-    if(*(uint8_t *) &num == 1) {
+    if (*(uint8_t *) &num == 1) {
         if (format_c == '<')
             skip_first = true;
     } else {
@@ -274,8 +410,7 @@ static PyObject *dlpack_from_buffer_protocol(PyObject *o, bool ro) {
 
             case '?': dt.code = (uint8_t) dlpack::dtype_code::Bool; break;
 
-            default:
-                fail = true;
+            default: fail = true;
         }
 
         if (is_complex) {
@@ -289,71 +424,64 @@ static PyObject *dlpack_from_buffer_protocol(PyObject *o, bool ro) {
 
     if (fail) {
         PyBuffer_Release(view.get());
-        return nullptr;
+        return mt_unique_ptr;
     }
 
-    mt->deleter = [](managed_dltensor *mt2) {
-        gil_scoped_acquire guard;
-        Py_buffer *buf = (Py_buffer *) mt2->manager_ctx;
-        PyBuffer_Release(buf);
-        PyMem_Free(mt2->manager_ctx);
-        PyMem_Free(mt2->dltensor.shape);
-        PyMem_Free(mt2->dltensor.strides);
-        PyMem_Free(mt2);
-    };
+    int32_t ndim = view->ndim;
+
+    static_assert(alignof(managed_dltensor_versioned) >= alignof(int64_t));
+    scoped_pymalloc<managed_dltensor_versioned> mt(1, 2 * sizeof(int64_t)*ndim);
+    int64_t* shape = nullptr;
+    int64_t* strides = nullptr;
+    if (ndim > 0) {
+        shape = new ((void*) (mt.get() + 1)) int64_t[2 * ndim];
+        strides = shape + ndim;
+    }
 
-    /* DLPack mandates 256-byte alignment of the 'DLTensor::data' field, but
-       PyTorch unfortunately ignores the 'byte_offset' value.. :-( */
+    /* See comments in function ndarray_create(). */
 #if 0
-    uintptr_t value_int = (uintptr_t) view->buf,
-              value_rounded = (value_int / 256) * 256;
+    uintptr_t data_uint = (uintptr_t) view->buf;
+    void* data_ptr = (void *) (data_uint & ~uintptr_t{255});
+    uint64_t data_offset = data_uint & uintptr_t{255};
 #else
-    uintptr_t value_int = (uintptr_t) view->buf,
-              value_rounded = value_int;
+    void* data_ptr = view->buf;
+    constexpr uint64_t data_offset = 0UL;
 #endif
 
-    mt->dltensor.data = (void *) value_rounded;
+    mt->dltensor.data = data_ptr;
     mt->dltensor.device = { device::cpu::value, 0 };
-    mt->dltensor.ndim = view->ndim;
+    mt->dltensor.ndim = ndim;
     mt->dltensor.dtype = dt;
-    mt->dltensor.byte_offset = value_int - value_rounded;
+    mt->dltensor.shape = shape;
+    mt->dltensor.strides = strides;
+    mt->dltensor.byte_offset = data_offset;
 
-    scoped_pymalloc<int64_t> strides((size_t) view->ndim);
-    scoped_pymalloc<int64_t> shape((size_t) view->ndim);
-    const int64_t itemsize = static_cast<int64_t>(view->itemsize);
-    for (size_t i = 0; i < (size_t) view->ndim; ++i) {
+    const int64_t itemsize = (int64_t) view->itemsize;
+    for (int32_t i = 0; i < ndim; ++i) {
         int64_t stride = view->strides[i] / itemsize;
         if (stride * itemsize != view->strides[i]) {
             PyBuffer_Release(view.get());
-            return nullptr;
+            return mt_unique_ptr;
         }
         strides[i] = stride;
         shape[i] = (int64_t) view->shape[i];
     }
 
+    mt->version = {dlpack::major_version, dlpack::minor_version};
     mt->manager_ctx = view.release();
-    mt->dltensor.shape = shape.release();
-    mt->dltensor.strides = strides.release();
-
-    return PyCapsule_New(mt.release(), "dltensor", [](PyObject *o) {
-        error_scope scope; // temporarily save any existing errors
-        managed_dltensor *mt =
-            (managed_dltensor *) PyCapsule_GetPointer(o, "dltensor");
-        if (mt) {
-            if (mt->deleter)
-                mt->deleter(mt);
-        } else {
-            PyErr_Clear();
-        }
-    });
+    mt->deleter = mt_from_buffer_delete;
+    mt->flags = (ro) ? dlpack::flag_bitmask_read_only : 0;
+
+    mt_unique_ptr.reset(mt.release());
+    return mt_unique_ptr;
 }
 
 bool ndarray_check(PyObject *o) noexcept {
-    if (PyObject_HasAttrString(o, "__dlpack__") || PyObject_CheckBuffer(o))
+    if (PyObject_HasAttr(o, static_pyobjects[pyobj_name::dunder_dlpack_str]) ||
+        PyObject_CheckBuffer(o))
         return true;
 
     PyTypeObject *tp = Py_TYPE(o);
-
     if (tp == &PyCapsule_Type)
         return true;
 
@@ -378,19 +506,41 @@ bool ndarray_check(PyObject *o) noexcept {
 }
 
 
-ndarray_handle *ndarray_import(PyObject *o, const ndarray_config *c,
+ndarray_handle *ndarray_import(PyObject *src, const ndarray_config *c,
                                bool convert, cleanup_list *cleanup) noexcept {
     object capsule;
-    bool is_pycapsule = PyCapsule_CheckExact(o);
+    const bool src_is_pycapsule = PyCapsule_CheckExact(src);
+    mt_unique_ptr_t mt_unique_ptr(nullptr, &mt_from_buffer_delete);
 
-    // If this is not a capsule, try calling o.__dlpack__()
-    if (!is_pycapsule) {
-        capsule = steal(PyObject_CallMethod(o, "__dlpack__", nullptr));
+    if (src_is_pycapsule) {
+        capsule = borrow(src);
+    } else {
+        // Try calling src.__dlpack__()
+        PyObject* args[] = {src, static_pyobjects[pyobj_name::dl_version_tpl]};
+        Py_ssize_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET;
+        capsule = steal(PyObject_VectorcallMethod(
+                          static_pyobjects[pyobj_name::dunder_dlpack_str],
+                          args, nargsf,
+                          static_pyobjects[pyobj_name::max_version_tpl]));
+
+        // Python array API standard v2023 introduced max_version.
+        // Try calling src.__dlpack__() without any kwargs.
+        if (!capsule.is_valid() && PyErr_ExceptionMatches(PyExc_TypeError)) {
+            PyErr_Clear();
+            capsule = steal(PyObject_VectorcallMethod(
+                              static_pyobjects[pyobj_name::dunder_dlpack_str],
+                              args, nargsf, nullptr));
+        }
 
+        // Try creating an ndarray via the buffer protocol
         if (!capsule.is_valid()) {
             PyErr_Clear();
-            PyTypeObject *tp = Py_TYPE(o);
+            mt_unique_ptr = make_mt_from_buffer_protocol(src, c->ro);
+        }
 
+        // Try the function to_dlpack(), already obsolete in array API v2021
+        if (!mt_unique_ptr && !capsule.is_valid()) {
+            PyTypeObject *tp = Py_TYPE(src);
             try {
                 const char *module_name =
                     borrow<str>(handle(tp).attr("__module__")).c_str();
@@ -398,59 +548,68 @@ ndarray_handle *ndarray_import(PyObject *o, const ndarray_config *c,
                 object package;
                 if (strncmp(module_name, "tensorflow.", 11) == 0)
                     package = module_::import_("tensorflow.experimental.dlpack");
-                else if (strcmp(module_name, "torch") == 0)
+                else if (strncmp(module_name, "torch", 5) == 0)
                     package = module_::import_("torch.utils.dlpack");
                 else if (strncmp(module_name, "jaxlib", 6) == 0)
                     package = module_::import_("jax.dlpack");
 
                 if (package.is_valid())
-                    capsule = package.attr("to_dlpack")(handle(o));
+                    capsule = package.attr("to_dlpack")(handle(src));
             } catch (...) {
                 capsule.reset();
             }
+            if (!capsule.is_valid())
+                return nullptr;
         }
+    }
 
-        // Try creating an ndarray via the buffer protocol
-        if (!capsule.is_valid())
-            capsule = steal(dlpack_from_buffer_protocol(o, c->ro));
-
-        if (!capsule.is_valid())
-            return nullptr;
+    void* mt;  // can be versioned or unversioned
+    bool versioned = true;
+    if (mt_unique_ptr) {
+        mt = mt_unique_ptr.get();
     } else {
-        capsule = borrow(o);
+        // Extract the managed_dltensor{_versioned} pointer from the capsule.
+        mt = PyCapsule_GetPointer(capsule.ptr(), "dltensor_versioned");
+        if (!mt) {
+            PyErr_Clear();
+            versioned = false;
+            mt = PyCapsule_GetPointer(capsule.ptr(), "dltensor");
+            if (!mt) {
+                PyErr_Clear();
+                return nullptr;
+            }
+        }
     }
 
-    // Extract the pointer underlying the capsule
-    void *ptr = PyCapsule_GetPointer(capsule.ptr(), "dltensor");
-    if (!ptr) {
-        PyErr_Clear();
+    dlpack::dltensor& t = (versioned)
+                              ? ((managed_dltensor_versioned *) mt)->dltensor
+                              : ((managed_dltensor *) mt)->dltensor;
+
+    uint64_t flags = (versioned) ? ((managed_dltensor_versioned *) mt)->flags
+                                 : 0UL;
+
+    // Reject a read-only ndarray if a writable one is required, and
+    // reject an ndarray not on the required device.
+    if ((!c->ro && (flags & dlpack::flag_bitmask_read_only))
+        || (c->device_type != 0 && t.device.device_type != c->device_type)) {
         return nullptr;
     }
 
-    // Check if the ndarray satisfies the requirements
-    dlpack::dltensor &t = ((managed_dltensor *) ptr)->dltensor;
-
+    // Check if the ndarray satisfies the remaining requirements.
     bool has_dtype = c->dtype != dlpack::dtype(),
-         has_device_type = c->device_type != 0,
          has_shape = c->ndim != -1,
          has_order = c->order != '\0';
 
-    bool pass_dtype = true, pass_device = true,
-         pass_shape = true, pass_order = true;
+    bool pass_dtype = true, pass_shape = true, pass_order = true;
 
     if (has_dtype)
         pass_dtype = t.dtype == c->dtype;
 
-    if (has_device_type)
-        pass_device = t.device.device_type == c->device_type;
-
     if (has_shape) {
-        pass_shape &= c->ndim == t.ndim;
-
+        pass_shape = t.ndim == c->ndim;
         if (pass_shape) {
             for (int32_t i = 0; i < c->ndim; ++i) {
-                if (c->shape[i] != t.shape[i] &&
-                    c->shape[i] != -1) {
+                if (c->shape[i] != -1 && t.shape[i] != c->shape[i]) {
                     pass_shape = false;
                     break;
                 }
@@ -499,14 +658,15 @@ ndarray_handle *ndarray_import(PyObject *o, const ndarray_config *c,
         }
     }
 
-    bool refused_conversion = t.dtype.code == (uint8_t) dlpack::dtype_code::Complex &&
-                              has_dtype &&
-                              c->dtype.code != (uint8_t) dlpack::dtype_code::Complex;
+    // Do not convert shape and do not convert complex numbers to non-complex.
+    convert &= pass_shape &
+               !(t.dtype.code == (uint8_t) dlpack::dtype_code::Complex
+                 && has_dtype
+                 && c->dtype.code != (uint8_t) dlpack::dtype_code::Complex);
 
-    // Support implicit conversion of 'dtype' and order
-    if (pass_device && pass_shape && (!pass_dtype || !pass_order) && convert &&
-        capsule.ptr() != o && !refused_conversion) {
-        PyTypeObject *tp = Py_TYPE(o);
+    // Support implicit conversion of dtype and order.
+    if (convert && (!pass_dtype || !pass_order) && !src_is_pycapsule) {
+        PyTypeObject *tp = Py_TYPE(src);
         str module_name_o = borrow<str>(handle(tp).attr("__module__"));
         const char *module_name = module_name_o.c_str();
 
@@ -518,16 +678,27 @@ ndarray_handle *ndarray_import(PyObject *o, const ndarray_config *c,
         if (dt.lanes != 1)
             return nullptr;
 
-        const char *prefix = nullptr;
         char dtype[11];
         if (dt.code == (uint8_t) dlpack::dtype_code::Bool) {
             std::strcpy(dtype, "bool");
         } else {
+            const char *prefix = nullptr;
             switch (dt.code) {
-                case (uint8_t) dlpack::dtype_code::Int: prefix = "int"; break;
-                case (uint8_t) dlpack::dtype_code::UInt: prefix = "uint"; break;
-                case (uint8_t) dlpack::dtype_code::Float: prefix = "float"; break;
-                case (uint8_t) dlpack::dtype_code::Complex: prefix = "complex"; break;
+                case (uint8_t) dlpack::dtype_code::Int:
+                    prefix = "int";
+                    break;
+                case (uint8_t) dlpack::dtype_code::UInt:
+                    prefix = "uint";
+                    break;
+                case (uint8_t) dlpack::dtype_code::Float:
+                    prefix = "float";
+                    break;
+                case (uint8_t) dlpack::dtype_code::Bfloat:
+                    prefix = "bfloat";
+                    break;
+                case (uint8_t) dlpack::dtype_code::Complex:
+                    prefix = "complex";
+                    break;
                 default:
                     return nullptr;
             }
@@ -536,25 +707,24 @@ ndarray_handle *ndarray_import(PyObject *o, const ndarray_config *c,
 
         object converted;
         try {
-            if (strcmp(module_name, "numpy") == 0 || strcmp(module_name, "cupy") == 0) {
-                converted = handle(o).attr("astype")(dtype, order);
-            } else if (strcmp(module_name, "torch") == 0) {
-                converted = handle(o).attr("to")(
-                    arg("dtype") = module_::import_("torch").attr(dtype));
+            if (strncmp(module_name, "numpy", 5) == 0
+                || strncmp(module_name, "cupy", 4) == 0) {
+                converted = handle(src).attr("astype")(dtype, order);
+            } else if (strncmp(module_name, "torch", 5) == 0) {
+                module_ torch = module_::import_("torch");
+                converted = handle(src).attr("to")(torch.attr(dtype));
                 if (c->order == 'C')
                     converted = converted.attr("contiguous")();
             } else if (strncmp(module_name, "tensorflow.", 11) == 0) {
-                converted = module_::import_("tensorflow")
-                                .attr("cast")(handle(o), dtype);
+                module_ tensorflow = module_::import_("tensorflow");
+                converted = tensorflow.attr("cast")(handle(src), dtype);
             } else if (strncmp(module_name, "jaxlib", 6) == 0) {
-                converted = handle(o).attr("astype")(dtype);
+                converted = handle(src).attr("astype")(dtype);
             }
         } catch (...) { converted.reset(); }
 
-        // Potentially try again recursively
-        if (!converted.is_valid()) {
-            return nullptr;
-        } else {
+        // Potentially try once again, recursively
+        if (converted.is_valid()) {
             ndarray_handle *h =
                 ndarray_import(converted.ptr(), c, false, nullptr);
             if (h && cleanup)
@@ -563,27 +733,31 @@ ndarray_handle *ndarray_import(PyObject *o, const ndarray_config *c,
         }
     }
 
-    if (!pass_dtype || !pass_device || !pass_shape || !pass_order)
+    if (!pass_dtype || !pass_shape || !pass_order)
         return nullptr;
 
     // Create a reference-counted wrapper
     scoped_pymalloc<ndarray_handle> result;
-    result->ndarray = (managed_dltensor *) ptr;
+    if (versioned)
+        result->mt_versioned = (managed_dltensor_versioned *) mt;
+    else
+        result->mt_unversioned = (managed_dltensor *) mt;
+
     result->refcount = 0;
     result->owner = nullptr;
-    result->free_shape = false;
+    result->versioned = versioned;
     result->call_deleter = true;
     result->ro = c->ro;
 
-    if (is_pycapsule) {
+    if (src_is_pycapsule) {
         result->self = nullptr;
     } else {
-        result->self = o;
-        Py_INCREF(o);
+        result->self = src;
+        Py_INCREF(src);
     }
 
-    // Ensure that the strides member is always initialized
-    if (t.strides) {
+    // If ndim > 0, ensure that the strides member is initialized.
+    if (t.strides || t.ndim == 0) {
         result->free_strides = false;
     } else {
         result->free_strides = true;
@@ -593,16 +767,19 @@ ndarray_handle *ndarray_import(PyObject *o, const ndarray_config *c,
             strides[i] = accum;
             accum *= t.shape[i];
         }
-
         t.strides = strides.release();
     }
 
-    // Mark the dltensor capsule as "consumed"
-    if (PyCapsule_SetName(capsule.ptr(), "used_dltensor") ||
-        PyCapsule_SetDestructor(capsule.ptr(), nullptr))
-        check(false, "nanobind::detail::ndarray_import(): could not mark "
-                     "dltensor capsule as consumed!");
+    if (capsule.is_valid()) {
+        // Mark the dltensor capsule as used, i.e., "consumed".
+        const char* used_name = (versioned) ? "used_dltensor_versioned"
+                                            : "used_dltensor";
+        if (PyCapsule_SetName(capsule.ptr(), used_name) ||
+            PyCapsule_SetDestructor(capsule.ptr(), nullptr))
+            check(false, "ndarray_import(): could not mark capsule as used");
+    }
 
+    mt_unique_ptr.release();
     return result.release();
 }
 
@@ -610,7 +787,8 @@ dlpack::dltensor *ndarray_inc_ref(ndarray_handle *th) noexcept {
     if (!th)
         return nullptr;
     ++th->refcount;
-    return &th->ndarray->dltensor;
+    return (th->versioned) ? &th->mt_versioned->dltensor
+                           : &th->mt_unversioned->dltensor;
 }
 
 void ndarray_dec_ref(ndarray_handle *th) noexcept {
@@ -625,50 +803,64 @@ void ndarray_dec_ref(ndarray_handle *th) noexcept {
 
         Py_XDECREF(th->owner);
         Py_XDECREF(th->self);
-        managed_dltensor *mt = th->ndarray;
-        if (th->free_shape) {
-            PyMem_Free(mt->dltensor.shape);
-            mt->dltensor.shape = nullptr;
-        }
-        if (th->free_strides) {
-            PyMem_Free(mt->dltensor.strides);
-            mt->dltensor.strides = nullptr;
-        }
-        if (th->call_deleter) {
+        if (th->versioned) {
+            managed_dltensor_versioned *mt = th->mt_versioned;
+            if (th->free_strides) {
+                PyMem_Free(mt->dltensor.strides);
+                mt->dltensor.strides = nullptr;
+            }
+            if (th->call_deleter) {
+                if (mt->deleter)
+                    mt->deleter(mt);
+            } else {
+                PyMem_Free(mt);  // This also frees shape and size arrays.
+            }
+        } else {
+            managed_dltensor *mt = th->mt_unversioned;
+            if (th->free_strides) {
+                PyMem_Free(mt->dltensor.strides);
+                mt->dltensor.strides = nullptr;
+            }
+            assert(th->call_deleter);
             if (mt->deleter)
                 mt->deleter(mt);
-        } else {
-            PyMem_Free(mt);
         }
         PyMem_Free(th);
     }
 }
 
-ndarray_handle *ndarray_create(void *value, size_t ndim, const size_t *shape_in,
+ndarray_handle *ndarray_create(void *data, size_t ndim, const size_t *shape_in,
                                PyObject *owner, const int64_t *strides_in,
                                dlpack::dtype dtype, bool ro, int device_type,
                                int device_id, char order) {
-    /* DLPack mandates 256-byte alignment of the 'DLTensor::data' field, but
-       PyTorch unfortunately ignores the 'byte_offset' value.. :-( */
+    /* DLPack mandates 256-byte alignment of the 'DLTensor::data' field,
+       but this requirement is generally ignored.  Also, PyTorch has/had
+       a bug in ignoring byte_offset and assuming it's zero.
+       It would be wrong to split the 64-bit raw pointer into two pieces,
+       as disabled below, since the pointer dltensor.data must point to
+       allocated memory (i.e., memory that can be accessed).
+       A byte_offset can be used to support array slicing when data is an
+       opaque device pointer or handle, on which arithmetic is impossible.
+       However, this function is not slicing the data.
+       See also: https://github.com/data-apis/array-api/discussions/779  */
 #if 0
-    uintptr_t value_int = (uintptr_t) value,
-              value_rounded = (value_int / 256) * 256;
+    uintptr_t data_uint = (uintptr_t) data;
+    data = (void *) (data_uint & ~uintptr_t{255});      // upper bits
+    uint64_t data_offset = data_uint & uintptr_t{255};  // lowest 8 bits
 #else
-    uintptr_t value_int = (uintptr_t) value,
-              value_rounded = value_int;
+    constexpr uint64_t data_offset = 0UL;
 #endif
     if (device_type == 0)
         device_type = device::cpu::value;
 
-    scoped_pymalloc<managed_dltensor> ndarray;
-    scoped_pymalloc<ndarray_handle> result;
-    scoped_pymalloc<int64_t> shape(ndim), strides(ndim);
-
-    auto deleter = [](managed_dltensor *mt) {
-        gil_scoped_acquire guard;
-        ndarray_handle *th = (ndarray_handle *) mt->manager_ctx;
-        ndarray_dec_ref(th);
-    };
+    static_assert(alignof(managed_dltensor_versioned) >= alignof(int64_t));
+    scoped_pymalloc<managed_dltensor_versioned> mt(1, 2 * sizeof(int64_t)*ndim);
+    int64_t* shape = nullptr;
+    int64_t* strides = nullptr;
+    if (ndim > 0) {
+        shape = new ((void*) (mt.get() + 1)) int64_t[2 * ndim];
+        strides = shape + ndim;
+    }
 
     for (size_t i = 0; i < ndim; ++i)
         shape[i] = (int64_t) shape_in[i];
@@ -689,27 +881,32 @@ ndarray_handle *ndarray_create(void *value, size_t ndim, const size_t *shape_in,
                 prod *= (int64_t) shape_in[i];
             }
         } else {
-            check(false, "nanobind::detail::ndarray_create(): unknown "
-                         "memory order requested!");
+            check(false, "ndarray_create(): unknown memory order requested!");
         }
     }
 
-    ndarray->dltensor.data = (void *) value_rounded;
-    ndarray->dltensor.device.device_type = (int32_t) device_type;
-    ndarray->dltensor.device.device_id = (int32_t) device_id;
-    ndarray->dltensor.ndim = (int32_t) ndim;
-    ndarray->dltensor.dtype = dtype;
-    ndarray->dltensor.byte_offset = value_int - value_rounded;
-    ndarray->dltensor.shape = shape.release();
-    ndarray->dltensor.strides = strides.release();
-    ndarray->manager_ctx = result.get();
-    ndarray->deleter = deleter;
-    result->ndarray = (managed_dltensor *) ndarray.release();
+    scoped_pymalloc<ndarray_handle> result;
+
+    mt->version = {dlpack::major_version, dlpack::minor_version};
+    mt->manager_ctx = result.get();
+    mt->deleter = [](managed_dltensor_versioned *self) {
+                      ndarray_dec_ref((ndarray_handle *) self->manager_ctx);
+                  };
+    mt->flags = (ro) ? dlpack::flag_bitmask_read_only : 0;
+    mt->dltensor.data = data;
+    mt->dltensor.device.device_type = (int32_t) device_type;
+    mt->dltensor.device.device_id = (int32_t) device_id;
+    mt->dltensor.ndim = (int32_t) ndim;
+    mt->dltensor.dtype = dtype;
+    mt->dltensor.shape = shape;
+    mt->dltensor.strides = strides;
+    mt->dltensor.byte_offset = data_offset;
+    result->mt_versioned = mt.release();
     result->refcount = 0;
     result->owner = owner;
     result->self = nullptr;
-    result->free_shape = true;
-    result->free_strides = true;
+    result->versioned = true;
+    result->free_strides = false;
     result->call_deleter = false;
     result->ro = ro;
     Py_XINCREF(owner);
@@ -717,7 +914,7 @@ ndarray_handle *ndarray_create(void *value, size_t ndim, const size_t *shape_in,
 }
 
 PyObject *ndarray_export(ndarray_handle *th, int framework,
-                        rv_policy policy, cleanup_list *cleanup) noexcept {
+                         rv_policy policy, cleanup_list *cleanup) noexcept {
     if (!th)
         return none().release().ptr();
 
@@ -765,57 +962,83 @@ PyObject *ndarray_export(ndarray_handle *th, int framework,
     object o;
     if (copy && framework == no_framework::value && th->self) {
         o = borrow(th->self);
-    } else if (framework == numpy::value || framework == jax::value || framework == memview::value) {
-        nb_ndarray *h = PyObject_New(nb_ndarray, nd_ndarray_tp());
+    } else if (framework == no_framework::value ||
+               framework == tensorflow::value) {
+        // Make a new capsule wrapping an unversioned managed_dltensor.
+        o = steal(th->make_capsule_unversioned());
+    } else {
+        // Make a Python object providing the buffer interface and having
+        // the two DLPack methods __dlpack__() and __dlpack_device__().
+        nb_ndarray *h = PyObject_New(nb_ndarray, nb_ndarray_tp());
         if (!h)
             return nullptr;
         h->th = th;
         ndarray_inc_ref(th);
         o = steal((PyObject *) h);
-    } else {
-        o = steal(PyCapsule_New(th->ndarray, "dltensor",
-                                ndarray_capsule_destructor));
-        ndarray_inc_ref(th);
     }
 
-    try {
-        if (framework == numpy::value) {
-            return module_::import_("numpy")
-                .attr("array")(o, arg("copy") = copy)
-                .release()
-                .ptr();
-        } else if (framework == memview::value) {
-            return PyMemoryView_FromObject(o.ptr());
-        } else {
-            const char *pkg_name;
-            switch (framework) {
-                case pytorch::value: pkg_name = "torch.utils.dlpack"; break;
-                case tensorflow::value: pkg_name = "tensorflow.experimental.dlpack"; break;
-                case jax::value: pkg_name = "jax.dlpack"; break;
-                case cupy::value: pkg_name = "cupy"; break;
-                default: pkg_name = nullptr;
-            }
+    if (framework == numpy::value) {
+        try {
+            PyObject* pkg_mod = module_import("numpy");
+            PyObject* args[] = {pkg_mod, o.ptr(),
+                                (copy) ? Py_True : Py_False};
+            Py_ssize_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET;
+            return PyObject_VectorcallMethod(
+                        static_pyobjects[pyobj_name::array_str], args, nargsf,
+                        static_pyobjects[pyobj_name::copy_tpl]);
+        } catch (const std::exception &e) {
+            PyErr_Format(PyExc_TypeError,
+                         "could not export nanobind::ndarray: %s",
+                         e.what());
+            return nullptr;
+        }
+    }
 
-            if (pkg_name)
-                o = module_::import_(pkg_name).attr("from_dlpack")(o);
+    try {
+        const char* pkg_name;
+        switch (framework) {
+            case pytorch::value:
+                pkg_name = "torch.utils.dlpack";
+                break;
+            case tensorflow::value:
+                pkg_name = "tensorflow.experimental.dlpack";
+                break;
+            case jax::value:
+                pkg_name = "jax.dlpack";
+                break;
+            case cupy::value:
+                pkg_name = "cupy";
+                break;
+            case memview::value:
+                return PyMemoryView_FromObject(o.ptr());
+            default:
+                pkg_name = nullptr;
+        }
+        if (pkg_name) {
+            PyObject* pkg_mod = module_import(pkg_name);
+            PyObject* args[] = {pkg_mod, o.ptr()};
+            Py_ssize_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET;
+            o = steal(PyObject_VectorcallMethod(
+                          static_pyobjects[pyobj_name::from_dlpack_str],
+                          args, nargsf, nullptr));
         }
     } catch (const std::exception &e) {
         PyErr_Format(PyExc_TypeError,
-                     "could not export nb::ndarray: %s",
+                     "could not export nanobind::ndarray: %s",
                      e.what());
         return nullptr;
     }
 
     if (copy) {
-        const char* copy_str = "copy";
+        PyObject* copy_function_name = static_pyobjects[pyobj_name::copy_str];
         if (framework == pytorch::value)
-            copy_str = "clone";
+            copy_function_name = static_pyobjects[pyobj_name::clone_str];
 
         try {
-            o = o.attr(copy_str)();
+            o = o.attr(copy_function_name)();
         } catch (std::exception &e) {
             PyErr_Format(PyExc_RuntimeError,
-                         "nanobind::detail::ndarray_export(): copy failed: %s",
+                         "copying nanobind::ndarray failed: %s",
                          e.what());
             return nullptr;
         }
diff --git a/extern/nanobind/src/nb_type.cpp b/extern/nanobind/src/nb_type.cpp
index f60083d77..cf1c8a056 100644
--- a/extern/nanobind/src/nb_type.cpp
+++ b/extern/nanobind/src/nb_type.cpp
@@ -21,23 +21,13 @@
 NAMESPACE_BEGIN(NB_NAMESPACE)
 NAMESPACE_BEGIN(detail)
 
-static PyObject **nb_dict_ptr(PyObject *self) {
-    PyTypeObject *tp = Py_TYPE(self);
-#if defined(Py_LIMITED_API)
+static PyObject **nb_dict_ptr(PyObject *self, PyTypeObject *tp) {
     Py_ssize_t dictoffset = nb_type_data(tp)->dictoffset;
-#else
-    Py_ssize_t dictoffset = tp->tp_dictoffset;
-#endif
     return dictoffset ? (PyObject **) ((uint8_t *) self + dictoffset) : nullptr;
 }
 
-static PyObject **nb_weaklist_ptr(PyObject *self) {
-    PyTypeObject *tp = Py_TYPE(self);
-#if defined(Py_LIMITED_API)
+static PyObject **nb_weaklist_ptr(PyObject *self, PyTypeObject *tp) {
     Py_ssize_t weaklistoffset = nb_type_data(tp)->weaklistoffset;
-#else
-    Py_ssize_t weaklistoffset = tp->tp_weaklistoffset;
-#endif
     return weaklistoffset ? (PyObject **) ((uint8_t *) self + weaklistoffset) : nullptr;
 }
 
@@ -47,19 +37,19 @@ static PyGetSetDef inst_getset[] = {
 };
 
 static int inst_clear(PyObject *self) {
-    PyObject **dict = nb_dict_ptr(self);
+    PyTypeObject *tp = Py_TYPE(self);
+    PyObject **dict = nb_dict_ptr(self, tp);
     if (dict)
         Py_CLEAR(*dict);
     return 0;
 }
 
 static int inst_traverse(PyObject *self, visitproc visit, void *arg) {
-    PyObject **dict = nb_dict_ptr(self);
+    PyTypeObject *tp = Py_TYPE(self);
+    PyObject **dict = nb_dict_ptr(self, tp);
     if (dict)
         Py_VISIT(*dict);
-#if PY_VERSION_HEX >= 0x03090000
-    Py_VISIT(Py_TYPE(self));
-#endif
+    Py_VISIT(tp);
     return 0;
 }
 
@@ -88,7 +78,7 @@ PyObject *inst_new_int(PyTypeObject *tp, PyObject * /* args */,
         uintptr_t payload = (uintptr_t) (self + 1);
 
         if (NB_UNLIKELY(align > sizeof(void *)))
-            payload = (payload + align - 1) / align * align;
+            payload = (payload + align - 1) & ~(uintptr_t(align) - 1);
 
         self->offset = (int32_t) ((intptr_t) payload - (intptr_t) self);
         self->direct = 1;
@@ -227,16 +217,16 @@ static void inst_dealloc(PyObject *self) {
         PyObject_GC_UnTrack(self);
 
         if (t->flags & (uint32_t) type_flags::has_dynamic_attr) {
-            PyObject **dict = nb_dict_ptr(self);
+            PyObject **dict = nb_dict_ptr(self, tp);
             if (dict)
                 Py_CLEAR(*dict);
         }
     }
 
     if (t->flags & (uint32_t) type_flags::is_weak_referenceable &&
-        nb_weaklist_ptr(self) != nullptr) {
+        nb_weaklist_ptr(self, tp) != nullptr) {
 #if defined(PYPY_VERSION)
-        PyObject **weaklist = nb_weaklist_ptr(self);
+        PyObject **weaklist = nb_weaklist_ptr(self, tp);
         if (weaklist)
             Py_CLEAR(*weaklist);
 #else
@@ -355,9 +345,9 @@ type_data *nb_type_c2p(nb_internals *internals_,
     nb_type_map_fast &type_c2p_fast = internals_->type_c2p_fast;
 #endif
 
-    nb_type_map_fast::iterator it_fast = type_c2p_fast.find(type);
+    nb_type_map_fast::iterator it_fast = type_c2p_fast.find((void *) type);
     if (it_fast != type_c2p_fast.end())
-        return it_fast->second;
+        return (type_data *) it_fast->second;
 
     lock_internals guard(internals_);
     nb_type_map_slow &type_c2p_slow = internals_->type_c2p_slow;
@@ -377,7 +367,7 @@ type_data *nb_type_c2p(nb_internals *internals_,
         d->alias_chain = chain;
 #endif
 
-        type_c2p_fast[type] = d;
+        type_c2p_fast[(void *) type] = d;
         return d;
     }
 
@@ -413,14 +403,14 @@ void nb_type_unregister(type_data *t) noexcept {
     bool fail = n_del_slow != 1;
 #else
     nb_type_map_fast &type_c2p_fast = internals_->type_c2p_fast;
-    size_t n_del_fast = type_c2p_fast.erase(t->type);
+    size_t n_del_fast = type_c2p_fast.erase((void *) t->type);
 
     bool fail = n_del_fast != 1 || n_del_slow != 1;
     if (!fail) {
         nb_alias_chain *cur = t->alias_chain;
         while (cur) {
             nb_alias_chain *next = cur->next;
-            n_del_fast = type_c2p_fast.erase(cur->value);
+            n_del_fast = type_c2p_fast.erase((void *) cur->value);
             if (n_del_fast != 1) {
                 fail = true;
                 break;
@@ -599,11 +589,6 @@ template <size_t I1, size_t I2, size_t Offset1, size_t Offset2> nb_slot constexp
        offsetof(PyHeapTypeObject, p1),                 \
        offsetof(PyHeapTypeObject, p1.p2##_##name)>()
 
-#if PY_VERSION_HEX < 0x03090000
-#  define Py_bf_getbuffer 1
-#  define Py_bf_releasebuffer 2
-#endif
-
 static constexpr nb_slot type_slots[] {
     E(1,  as_buffer, bf, getbuffer),
     E(2,  as_buffer, bf, releasebuffer),
@@ -718,23 +703,32 @@ static PyObject *nb_type_from_metaclass(PyTypeObject *meta, PyObject *mod,
        is why nanobind can only target the stable ABI on version 3.12+. */
 
     const char *name = strrchr(spec->name, '.');
-    if (name)
+    PyObject *modname_o = nullptr;
+    if (name) {
+        modname_o = PyUnicode_FromStringAndSize(spec->name, name - spec->name);
+        if (!modname_o)
+            return nullptr;
         name++;
-    else
+    } else {
         name = spec->name;
+    }
 
     PyObject *name_o = PyUnicode_InternFromString(name);
-    if (!name_o)
+    if (!name_o) {
+        Py_XDECREF(modname_o);
         return nullptr;
+    }
 
     const char *name_cstr = PyUnicode_AsUTF8AndSize(name_o, nullptr);
     if (!name_cstr) {
+        Py_XDECREF(modname_o);
         Py_DECREF(name_o);
         return nullptr;
     }
 
     PyHeapTypeObject *ht = (PyHeapTypeObject *) PyType_GenericAlloc(meta, 0);
     if (!ht) {
+        Py_XDECREF(modname_o);
         Py_DECREF(name_o);
         return nullptr;
     }
@@ -743,14 +737,10 @@ static PyObject *nb_type_from_metaclass(PyTypeObject *meta, PyObject *mod,
     ht->ht_qualname = name_o;
     Py_INCREF(name_o);
 
-#if PY_VERSION_HEX >= 0x03090000
     if (mod) {
         Py_INCREF(mod);
         ht->ht_module = mod;
     }
-#else
-    (void) mod;
-#endif
 
     PyTypeObject *tp = &ht->ht_type;
     tp->tp_name = name_cstr;
@@ -828,6 +818,14 @@ static PyObject *nb_type_from_metaclass(PyTypeObject *meta, PyObject *mod,
         }
     }
 
+    if (modname_o && !fail) {
+        tp->tp_dict = PyDict_New();
+        if (!tp->tp_dict ||
+            PyDict_SetItemString(tp->tp_dict, "__module__", modname_o) < 0)
+            fail = true;
+    }
+    Py_XDECREF(modname_o);
+
     if (fail || PyType_Ready(tp) != 0) {
         Py_DECREF(tp);
         return nullptr;
@@ -839,6 +837,92 @@ static PyObject *nb_type_from_metaclass(PyTypeObject *meta, PyObject *mod,
 
 extern int nb_type_setattro(PyObject* obj, PyObject* name, PyObject* value);
 
+// Implements the vector call protocol directly on type objects to construct
+// instances more efficiently.
+static PyObject *nb_type_vectorcall(PyObject *self, PyObject *const *args_in,
+                                    size_t nargsf,
+                                    PyObject *kwargs_in) noexcept {
+    PyTypeObject *tp = (PyTypeObject *) self;
+    type_data *td = nb_type_data(tp);
+    nb_func *func = (nb_func *) td->init;
+    bool is_init = (td->flags & (uint32_t) type_flags::has_new) == 0;
+    Py_ssize_t nargs = PyVectorcall_NARGS(nargsf);
+
+    if (NB_UNLIKELY(!func)) {
+        PyErr_Format(PyExc_TypeError, "%s: no constructor defined!", td->name);
+        return nullptr;
+    }
+
+    if (NB_LIKELY(is_init)) {
+        self = inst_new_int(tp, nullptr, nullptr);
+        if (!self)
+            return nullptr;
+    } else if (nargs == 0 && !kwargs_in &&
+               !(td->flags & (uint32_t) type_flags::has_nullary_new)) {
+        // When the bindings define a custom __new__ operator, nanobind always
+        // provides a no-argument dummy __new__ constructor to handle unpickling
+        // via __setstate__. This is an implementation detail that should not be
+        // exposed. Therefore, only allow argument-less calls if there is an
+        // actual __new__ overload with a compatible signature. This is
+        // detected in nb_func.cpp based on whether any __init__ overload can
+        // accept no arguments.
+
+        return func->vectorcall((PyObject *) func, nullptr, 0, nullptr);
+    }
+
+    const size_t buf_size = 5;
+    PyObject **args, *buf[buf_size], *temp = nullptr;
+    bool alloc = false;
+
+    if (NB_LIKELY(nargsf & PY_VECTORCALL_ARGUMENTS_OFFSET)) {
+        args = (PyObject **) (args_in - 1);
+        temp = args[0];
+    } else {
+        size_t size = nargs + 1;
+        if (kwargs_in)
+            size += NB_TUPLE_GET_SIZE(kwargs_in);
+
+        if (size < buf_size) {
+            args = buf;
+        } else {
+            args = (PyObject **) PyMem_Malloc(size * sizeof(PyObject *));
+            if (!args) {
+                if (is_init)
+                    Py_DECREF(self);
+                return PyErr_NoMemory();
+            }
+            alloc = true;
+        }
+
+        memcpy(args + 1, args_in, sizeof(PyObject *) * (size - 1));
+    }
+
+    args[0] = self;
+
+    PyObject *rv =
+        func->vectorcall((PyObject *) func, args, nargs + 1, kwargs_in);
+
+    args[0] = temp;
+
+    if (NB_UNLIKELY(alloc))
+        PyMem_Free(args);
+
+    if (NB_LIKELY(is_init)) {
+        if (!rv) {
+            Py_DECREF(self);
+            return nullptr;
+        }
+
+        // __init__ constructor: 'rv' is None
+        Py_DECREF(rv);
+        return self;
+    } else {
+        // __new__ constructor
+        return rv;
+    }
+}
+
+
 static PyTypeObject *nb_type_tp(size_t supplement) noexcept {
     object key = steal(PyLong_FromSize_t(supplement));
     nb_internals *internals_ = internals;
@@ -854,27 +938,6 @@ static PyTypeObject *nb_type_tp(size_t supplement) noexcept {
         if (tp)
             return tp;
 
-#if defined(Py_LIMITED_API)
-        PyMemberDef members[] = {
-            { "__vectorcalloffset__", Py_T_PYSSIZET, 0, Py_READONLY, nullptr },
-            { nullptr, 0, 0, 0, nullptr }
-        };
-
-        // Workaround because __vectorcalloffset__ does not support Py_RELATIVE_OFFSET
-        members[0].offset = internals_->type_data_offset + offsetof(type_data, vectorcall);
-#endif
-
-        PyType_Slot slots[] = {
-            { Py_tp_base, &PyType_Type },
-            { Py_tp_dealloc, (void *) nb_type_dealloc },
-            { Py_tp_setattro, (void *) nb_type_setattro },
-            { Py_tp_init, (void *) nb_type_init },
-#if defined(Py_LIMITED_API)
-            { Py_tp_members, (void *) members },
-#endif
-            { 0, nullptr }
-        };
-
 #if PY_VERSION_HEX >= 0x030C0000
         int basicsize = -(int) (sizeof(type_data) + supplement),
             itemsize = 0;
@@ -886,16 +949,36 @@ static PyTypeObject *nb_type_tp(size_t supplement) noexcept {
         char name[17 + 20 + 1];
         snprintf(name, sizeof(name), "nanobind.nb_type_%zu", supplement);
 
+        PyType_Slot slots[] = {
+            { Py_tp_base, &PyType_Type },
+            { Py_tp_dealloc, (void *) nb_type_dealloc },
+            { Py_tp_setattro, (void *) nb_type_setattro },
+            { Py_tp_init, (void *) nb_type_init },
+            { 0, nullptr },
+            { 0, nullptr }
+        };
+
         PyType_Spec spec = {
             /* .name = */ name,
             /* .basicsize = */ basicsize,
             /* .itemsize = */ itemsize,
-            /* .flags = */ Py_TPFLAGS_DEFAULT,
+            /* .flags = */ Py_TPFLAGS_DEFAULT | NB_TPFLAGS_IMMUTABLETYPE,
             /* .slots = */ slots
         };
 
 #if defined(Py_LIMITED_API)
-        spec.flags |= Py_TPFLAGS_HAVE_VECTORCALL;
+        PyMemberDef members[] = {
+            { "__vectorcalloffset__", Py_T_PYSSIZET, 0, Py_READONLY, nullptr },
+            { nullptr, 0, 0, 0, nullptr }
+        };
+
+        // Workaround because __vectorcalloffset__ does not support Py_RELATIVE_OFFSET
+        members[0].offset = internals_->type_data_offset + offsetof(type_data, vectorcall);
+
+        if (NB_DYNAMIC_VERSION < 0x030E0000) {
+            slots[4] = { Py_tp_members, (void *) members };
+            spec.flags |= Py_TPFLAGS_HAVE_VECTORCALL;
+        }
 #endif
 
         tp = (PyTypeObject *) nb_type_from_metaclass(
@@ -903,8 +986,6 @@ static PyTypeObject *nb_type_tp(size_t supplement) noexcept {
 
         make_immortal((PyObject *) tp);
 
-        handle(tp).attr("__module__") = "nanobind";
-
         int rv = 1;
         if (tp)
             rv = PyDict_SetItem(internals_->nb_type_dict, key.ptr(), (PyObject *) tp);
@@ -957,97 +1038,10 @@ NB_NOINLINE char *extract_name(const char *cmd, const char *prefix, const char *
     return result;
 }
 
-#if PY_VERSION_HEX >= 0x03090000
 static PyMethodDef class_getitem_method[] = {
     { "__class_getitem__", Py_GenericAlias, METH_O | METH_CLASS, nullptr },
     { nullptr }
 };
-#endif
-
-// Implements the vector call protocol directly on type objects to construct
-// instances more efficiently.
-static PyObject *nb_type_vectorcall(PyObject *self, PyObject *const *args_in,
-                                    size_t nargsf,
-                                    PyObject *kwargs_in) noexcept {
-    PyTypeObject *tp = (PyTypeObject *) self;
-    type_data *td = nb_type_data(tp);
-    nb_func *func = (nb_func *) td->init;
-    bool is_init = (td->flags & (uint32_t) type_flags::has_new) == 0;
-    Py_ssize_t nargs = NB_VECTORCALL_NARGS(nargsf);
-
-    if (NB_UNLIKELY(!func)) {
-        PyErr_Format(PyExc_TypeError, "%s: no constructor defined!", td->name);
-        return nullptr;
-    }
-
-    if (NB_LIKELY(is_init)) {
-        self = inst_new_int(tp, nullptr, nullptr);
-        if (!self)
-            return nullptr;
-    } else if (nargs == 0 && !kwargs_in &&
-               !(td->flags & (uint32_t) type_flags::has_nullary_new)) {
-        // When the bindings define a custom __new__ operator, nanobind always
-        // provides a no-argument dummy __new__ constructor to handle unpickling
-        // via __setstate__. This is an implementation detail that should not be
-        // exposed. Therefore, only allow argument-less calls if there is an
-        // actual __new__ overload with a compatible signature. This is
-        // detected in nb_func.cpp based on whether any __init__ overload can
-        // accept no arguments.
-
-        return func->vectorcall((PyObject *) func, nullptr, 0, nullptr);
-    }
-
-    const size_t buf_size = 5;
-    PyObject **args, *buf[buf_size], *temp = nullptr;
-    bool alloc = false;
-
-    if (NB_LIKELY(nargsf & NB_VECTORCALL_ARGUMENTS_OFFSET)) {
-        args = (PyObject **) (args_in - 1);
-        temp = args[0];
-    } else {
-        size_t size = nargs + 1;
-        if (kwargs_in)
-            size += NB_TUPLE_GET_SIZE(kwargs_in);
-
-        if (size < buf_size) {
-            args = buf;
-        } else {
-            args = (PyObject **) PyMem_Malloc(size * sizeof(PyObject *));
-            if (!args) {
-                if (is_init)
-                    Py_DECREF(self);
-                return PyErr_NoMemory();
-            }
-            alloc = true;
-        }
-
-        memcpy(args + 1, args_in, sizeof(PyObject *) * (size - 1));
-    }
-
-    args[0] = self;
-
-    PyObject *rv =
-        func->vectorcall((PyObject *) func, args, nargs + 1, kwargs_in);
-
-    args[0] = temp;
-
-    if (NB_UNLIKELY(alloc))
-        PyMem_Free(args);
-
-    if (NB_LIKELY(is_init)) {
-        if (!rv) {
-            Py_DECREF(self);
-            return nullptr;
-        }
-
-        // __init__ constructor: 'rv' is None
-        Py_DECREF(rv);
-        return self;
-    } else {
-        // __new__ constructor
-        return rv;
-    }
-}
 
 /// Called when a C++ type is bound via nb::class_<>
 PyObject *nb_type_new(const type_init_data *t) noexcept {
@@ -1118,17 +1112,16 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
 
     PyObject *base = nullptr;
 
-#if PY_VERSION_HEX >= 0x03090000
+#if !defined(PYPY_VERSION) // see https://github.com/pypy/pypy/issues/4914
     bool generic_base = false;
 #endif
-
     if (has_base_py) {
         check(!has_base,
               "nanobind::detail::nb_type_new(\"%s\"): multiple base types "
               "specified!", t_name);
         base = (PyObject *) t->base_py;
 
-        #if PY_VERSION_HEX >= 0x03090000 && !defined(PYPY_VERSION) // see https://github.com/pypy/pypy/issues/4914
+#if !defined(PYPY_VERSION) // see https://github.com/pypy/pypy/issues/4914
         if (Py_TYPE(base) == &Py_GenericAliasType) {
             base = PyObject_GetAttrString(base, "__origin__");
             check(base != nullptr,
@@ -1136,7 +1129,7 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
             Py_DECREF(base);
             generic_base = true;
         }
-        #endif
+#endif
 
         check(nb_type_check(base),
               "nanobind::detail::nb_type_new(\"%s\"): base type is not a "
@@ -1194,7 +1187,7 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
 
     char *name_copy = strdup_check(name.c_str());
 
-    constexpr size_t nb_type_max_slots = 11,
+    constexpr size_t nb_type_max_slots = 12,
                      nb_extra_slots = 80,
                      nb_total_slots = nb_type_max_slots +
                                       nb_extra_slots + 1;
@@ -1302,14 +1295,11 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
     if (num_members > 0)
         *s++ = { Py_tp_members, (void*) members };
 
-#if PY_VERSION_HEX < 0x03090000
-    // Features that are unsupported in Python 3.8
-    (void) is_generic;
-    type_vectorcall = nullptr;
-#else
     if (is_generic)
         *s++ = { Py_tp_methods, (void*) class_getitem_method };
-#endif
+
+    if (NB_DYNAMIC_VERSION >= 0x030E0000 && type_vectorcall)
+        *s++ = { Py_tp_vectorcall, (void *) type_vectorcall };
 
     if (has_traverse)
         spec.flags |= Py_TPFLAGS_HAVE_GC;
@@ -1346,11 +1336,17 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
         to->keep_shared_from_this_alive = tb->keep_shared_from_this_alive;
     }
 
-    #if defined(Py_LIMITED_API)
-        to->vectorcall = type_vectorcall;
-    #else
-        ((PyTypeObject *) result)->tp_vectorcall = type_vectorcall;
-    #endif
+    if (NB_DYNAMIC_VERSION < 0x030E0000) {
+        // On Python 3.14+, use Py_tp_vectorcall to set the type vectorcall
+        // slot. Otherwise, assign tp_vectorcall or use a workaround (via
+        // tp_vectorcall_offset) for stable ABI builds.
+
+        #if defined(Py_LIMITED_API)
+            to->vectorcall = type_vectorcall;
+        #else
+            ((PyTypeObject *) result)->tp_vectorcall = type_vectorcall;
+        #endif
+    }
 
     to->name = name_copy;
     to->type_py = (PyTypeObject *) result;
@@ -1362,13 +1358,10 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
     if (is_weak_referenceable)
         to->flags |= (uint32_t) type_flags::is_weak_referenceable;
 
-    #if defined(Py_LIMITED_API)
-        /* These must be set unconditionally so that nb_dict_ptr() /
-           nb_weaklist_ptr() return null (rather than garbage) on
-           objects whose types don't use the corresponding feature. */
-        to->dictoffset = (uint32_t) dictoffset;
-        to->weaklistoffset = (uint32_t) weaklistoffset;
-    #endif
+    // Always cache dictoffset/weaklistoffset so nb_dict_ptr()/nb_weaklist_ptr()
+    // only access dicts/weaklists created by nanobind, not those added by Python
+    to->dictoffset = (uint32_t) dictoffset;
+    to->weaklistoffset = (uint32_t) weaklistoffset;
 
     if (t->scope != nullptr)
         setattr(t->scope, t_name, result);
@@ -1383,7 +1376,7 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
         internals_->type_c2p_slow[t->type] = to;
 
         #if !defined(NB_FREE_THREADED)
-            internals_->type_c2p_fast[t->type] = to;
+            internals_->type_c2p_fast[(void *) t->type] = to;
         #endif
     }
 
@@ -1392,7 +1385,7 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
         free((char *) t_name);
     }
 
-#if PY_VERSION_HEX >= 0x03090000
+#if !defined(PYPY_VERSION)
     if (generic_base)
         setattr(result, "__orig_bases__", make_tuple(handle(t->base_py)));
 #endif
@@ -1402,21 +1395,9 @@ PyObject *nb_type_new(const type_init_data *t) noexcept {
 
 
 PyObject *call_one_arg(PyObject *fn, PyObject *arg) noexcept {
-    PyObject *result;
-#if PY_VERSION_HEX < 0x03090000
-    PyObject *args = PyTuple_New(1);
-    if (!args)
-        return nullptr;
-    Py_INCREF(arg);
-    NB_TUPLE_SET_ITEM(args, 0, arg);
-    result = PyObject_CallObject(fn, args);
-    Py_DECREF(args);
-#else
-    PyObject *args[2] = { nullptr, arg };
-    result = PyObject_Vectorcall(fn, args + 1,
-                                 NB_VECTORCALL_ARGUMENTS_OFFSET + 1, nullptr);
-#endif
-    return result;
+    PyObject *argv[2] = { nullptr, arg };
+    return PyObject_Vectorcall(fn, argv + 1,
+                               PY_VECTORCALL_ARGUMENTS_OFFSET + 1, nullptr);
 }
 
 /// Encapsulates the implicit conversion part of nb_type_get()
diff --git a/extern/nanobind/src/stubgen.py b/extern/nanobind/src/stubgen.py
index 9098c50e6..6bf8eef65 100755
--- a/extern/nanobind/src/stubgen.py
+++ b/extern/nanobind/src/stubgen.py
@@ -57,6 +57,7 @@ class and repeatedly call ``.put()`` to register modules or contents within the
 import builtins
 import enum
 from inspect import Signature, Parameter, signature, ismodule
+import io
 import textwrap
 import importlib
 import importlib.machinery
@@ -69,10 +70,7 @@ class and repeatedly call ``.put()`` to register modules or contents within the
 import re
 import sys
 
-if sys.version_info < (3, 9):
-    from typing import Match, Pattern
-else:
-    from re import Match, Pattern
+from re import Match, Pattern
 
 if sys.version_info < (3, 11):
     try:
@@ -84,15 +82,31 @@ class and repeatedly call ``.put()`` to register modules or contents within the
 else:
     typing_extensions = None
 
-# Exclude various standard elements found in modules, classes, etc.
 SKIP_LIST = [
+    # Various standard attributes found in modules, classes, etc.
     "__doc__", "__module__", "__name__", "__new__", "__builtins__",
     "__cached__", "__path__", "__version__", "__spec__", "__loader__",
     "__package__", "__nb_signature__", "__class_getitem__", "__orig_bases__",
     "__file__", "__dict__", "__weakref__", "__format__", "__nb_enum__",
     "__firstlineno__", "__static_attributes__", "__annotations__", "__annotate__",
-    "__annotate_func__"
+    "__annotate_func__",
+
+    # Auto-generated enum attributes. Type checkers synthesize these, so they
+    # shouldn't appear in the stubs.
+    "_new_member_", "_use_args_", "_member_names_", "_member_map_",
+    "_value2member_map_", "_hashable_values_", "_unhashable_values_",
+    "_unhashable_values_map_", "_value_repr_",
 ]
+
+# Interpreter-internal types.
+TYPES_TYPES = {
+    getattr(types, name): name for name in [
+        "MethodDescriptorType",
+        "MemberDescriptorType",
+        "ModuleType",
+    ]
+}
+
 # fmt: on
 
 # This type is used to track per-module imports (``import name as desired_name``)
@@ -235,8 +249,8 @@ def __init__(
         # Current depth / indentation level
         self.depth = 0
 
-        # Output will be appended to this string
-        self.output = ""
+        # Output buffer
+        self._output = io.StringIO()
 
         # A stack to avoid infinite recursion
         self.stack: List[object] = []
@@ -283,22 +297,29 @@ def __init__(
             'MutableSequence|MutableSet|Sequence|ValuesView)'
         )
 
+    @property
+    def output(self) -> str:
+        """Get the current output as a string."""
+        return self._output.getvalue()
+
     def write(self, s: str) -> None:
         """Append raw characters to the output"""
-        self.output += s
+        self._output.write(s)
 
     def write_ln(self, line: str) -> None:
         """Append an indented line"""
         if len(line) != 0 and not line.isspace():
-            self.output += "    " * self.depth + line
-        self.output += "\n"
+            self._output.write("    " * self.depth + line)
+        self._output.write("\n")
 
-    def write_par(self, line: str) -> None:
-        """Append an indented paragraph"""
-        self.output += textwrap.indent(line, "    " * self.depth)
+    def _replace_tail(self, num_chars: int, replacement: str) -> None:
+        """Remove the last num_chars from output and append replacement."""
+        self._output.seek(self._output.tell() - num_chars)
+        self._output.truncate()
+        self._output.write(replacement)
 
-    def put_docstr(self, docstr: str) -> None:
-        """Append an indented single or multi-line docstring"""
+    def format_docstr(self, docstr: str, depth: int) -> str:
+        """Format a single or multi-line docstring with given indentation"""
         docstr = textwrap.dedent(docstr).strip()
         raw_str = ""
         if "''" in docstr or "\\" in docstr:
@@ -308,7 +329,11 @@ def put_docstr(self, docstr: str) -> None:
         if len(docstr) > 70 or "\n" in docstr:
             docstr = "\n" + docstr + "\n"
         docstr = f'{raw_str}"""{docstr}"""\n'
-        self.write_par(docstr)
+        return textwrap.indent(docstr, "    " * depth)
+
+    def put_docstr(self, docstr: str) -> None:
+        """Append an indented single or multi-line docstring"""
+        self.write(self.format_docstr(docstr, self.depth))
 
     def put_nb_overload(self, fn: NbFunction, sig: NbFunctionSignature, name: Optional[str] = None) -> None:
         """
@@ -370,12 +395,12 @@ def put_nb_overload(self, fn: NbFunction, sig: NbFunctionSignature, name: Option
         if not docstr or not self.include_docstrings:
             for s in sig_str.split("\n"):
                 self.write_ln(s)
-            self.output = self.output[:-1] + ": ...\n"
+            self._replace_tail(1, ": ...\n")
         else:
             docstr = textwrap.dedent(docstr)
             for s in sig_str.split("\n"):
                 self.write_ln(s)
-            self.output = self.output[:-1] + ":\n"
+            self._replace_tail(1, ":\n")
             self.depth += 1
             self.put_docstr(docstr)
             self.depth -= 1
@@ -529,7 +554,7 @@ def put_type(self, tp: NbType, name: Optional[str]):
                 # Types with a custom signature override
                 for s in tp.__nb_signature__.split("\n"):
                     self.write_ln(self.simplify_types(s))
-                self.output = self.output[:-1] + ":\n"
+                self._replace_tail(1, ":\n")
             else:
                 self.write_ln(f"class {tp_name}:")
                 if tp_bases is None:
@@ -539,7 +564,7 @@ def put_type(self, tp: NbType, name: Optional[str]):
                     tp_bases = [self.type_str(base) for base in tp_bases]
 
                 if tp_bases != ["object"]:
-                    self.output = self.output[:-2] + "("
+                    self._replace_tail(2, "(")
                     for i, base in enumerate(tp_bases):
                         if i:
                             self.write(", ")
@@ -547,14 +572,16 @@ def put_type(self, tp: NbType, name: Optional[str]):
                     self.write("):\n")
 
             self.depth += 1
-            output_len = len(self.output)
+            output_pos = self._output.tell()
             if docstr and self.include_docstrings:
                 self.put_docstr(docstr)
                 if len(tp_dict):
                     self.write("\n")
+            self.apply_pattern(self.prefix + ".__prefix__", None)
             for k, v in tp_dict.items():
                 self.put(v, k, tp)
-            if output_len == len(self.output):
+            self.apply_pattern(self.prefix + ".__suffix__", None)
+            if output_pos == self._output.tell():
                 self.write_ln("pass\n")
             self.depth -= 1
 
@@ -596,7 +623,7 @@ def put_value(self, value: object, name: str, parent: Optional[object] = None, a
 
         if isinstance(parent, type) and issubclass(tp, parent):
             # This is an entry of an enumeration
-            self.write_ln(f"{name} = {typing.cast(enum.Enum, value).value}")
+            self.write_ln(f"{name} = {typing.cast(enum.Enum, value)._value_}")
             if value.__doc__ and self.include_docstrings:
                 self.put_docstr(value.__doc__)
             self.write("\n")
@@ -627,12 +654,24 @@ def put_value(self, value: object, name: str, parent: Optional[object] = None, a
             self.write_ln(f"{name}{types} = {value_str}\n")
 
     def is_type_var(self, tp: type) -> bool:
-        return (issubclass(tp, typing.TypeVar)
-            or (sys.version_info >= (3, 11) and issubclass(tp, typing.TypeVarTuple))
-            or (typing_extensions is not None
-            and (
-                issubclass(tp, typing_extensions.TypeVar)
-                or issubclass(tp, typing_extensions.TypeVarTuple))))
+        if issubclass(tp, typing.TypeVar):
+            return True
+        if sys.version_info >= (3, 10) and issubclass(tp, typing.ParamSpec):
+            return True
+        if sys.version_info >= (3, 11) and issubclass(tp, typing.TypeVarTuple):
+            return True
+        if typing_extensions is not None:
+            if issubclass(
+                tp,
+                (
+                    typing_extensions.TypeVar,
+                    typing_extensions.ParamSpec,
+                    typing_extensions.TypeVarTuple
+                )
+            ):
+                return True
+        return False
+
 
     def simplify_types(self, s: str) -> str:
         """
@@ -661,8 +700,7 @@ def simplify_types(self, s: str) -> str:
         # Process nd-array type annotations so that MyPy accepts them
         s = self.ndarray_re.sub(lambda m: self._format_ndarray(m.group(2)), s)
 
-        if sys.version_info >= (3, 9, 0):
-            s = self.abc_re.sub(r'collections.abc.\1', s)
+        s = self.abc_re.sub(r'collections.abc.\1', s)
 
         # Process other type names and add suitable import statements
         def process_general(m: Match[str]) -> str:
@@ -718,6 +756,7 @@ def _format_ndarray(self, annotation: str) -> str:
 
         if m:
             dtype = "numpy."+ m.group(1)
+            dtype = dtype.replace('bool', 'bool_')
             annotation = re.sub(r"dtype=\w+,?\s*", "", annotation).rstrip(", ")
 
         # Turn shape notation into a valid Python type expression
@@ -987,21 +1026,26 @@ def expr_str(self, e: Any, abbrev: bool = True) -> Optional[str]:
         complicated.
         """
         tp = type(e)
-        for t in [bool, int, type(None), type(builtins.Ellipsis)]:
-            if issubclass(tp, t):
-                return repr(e)
-        if issubclass(tp, float):
+        if issubclass(tp, (bool, int, type(None), type(builtins.Ellipsis))):
+            s = repr(e)
+            if len(s) < self.max_expr_length or not abbrev:
+                return s
+        elif issubclass(tp, float):
             s = repr(e)
             if "inf" in s or "nan" in s:
-                return f"float('{s}')"
-            else:
+                s = f"float('{s}')"
+            if len(s) < self.max_expr_length or not abbrev:
                 return s
         elif issubclass(tp, type) or typing.get_origin(e):
             return self.type_str(e)
         elif issubclass(tp, typing.ForwardRef):
             return f'"{e.__forward_arg__}"'
         elif issubclass(tp, enum.Enum):
-            return self.type_str(tp) + '.' + e.name
+            return self.type_str(tp) + '.' + e._name_
+        elif (sys.version_info >= (3, 10) and issubclass(tp, typing.ParamSpec)) \
+            or (typing_extensions is not None and issubclass(tp, typing_extensions.ParamSpec)):
+            tv = self.import_object(tp.__module__, "ParamSpec")
+            return f'{tv}("{e.__name__}")'
         elif (sys.version_info >= (3, 11) and issubclass(tp, typing.TypeVarTuple)) \
             or (typing_extensions is not None and issubclass(tp, typing_extensions.TypeVarTuple)):
             tv = self.import_object(tp.__module__, "TypeVarTuple")
@@ -1010,13 +1054,17 @@ def expr_str(self, e: Any, abbrev: bool = True) -> Optional[str]:
             tv = self.import_object("typing", "TypeVar")
             s = f'{tv}("{e.__name__}"'
             for v in getattr(e, "__constraints__", ()):
-                v = self.expr_str(v)
+                v = self.type_str(v)
                 assert v
                 s += ", " + v
-            for k in ["contravariant", "covariant", "bound", "infer_variance"]:
+            if v := getattr(e, "__bound__", None):
+                v = self.type_str(v)
+                assert v
+                s += ", bound=" + v
+            for k in ["contravariant", "covariant", "infer_variance"]:
                 v = getattr(e, f"__{k}__", None)
                 if v:
-                    v = self.expr_str(v)
+                    v = self.expr_str(v, abbrev=False)
                     if v is None:
                         return None
                     s += f", {k}=" + v
@@ -1139,8 +1187,10 @@ def type_str(self, tp: Union[List[Any], Tuple[Any, ...], Dict[Any, Any], Any]) -
                 + ", ".join(args_gen)
                 + "]"
             )
-        elif tp is types.ModuleType:
-            result = "types.ModuleType"
+        elif tp in TYPES_TYPES:
+            result = f"types.{TYPES_TYPES[tp]}"
+        elif tp is Ellipsis:
+            result = "..."
         elif isinstance(tp, type):
             result = tp.__module__ + "." + tp.__qualname__
         else:
@@ -1173,6 +1223,12 @@ def check_party(self, module: str) -> Literal[0, 1, 2]:
     def get(self) -> str:
         """Generate the final stub output"""
         s = ""
+
+        # Potentially add a module docstring
+        doc = getattr(self.module, '__doc__', None)
+        if self.include_docstrings and doc:
+            s += self.format_docstr(doc, 0) + "\n"
+
         last_party = None
 
         for module in sorted(self.imports, key=lambda i: str(self.check_party(i)) + i):
@@ -1302,6 +1358,14 @@ def parse_options(args: List[str]) -> argparse.Namespace:
         help="exclude docstrings from the generated stub",
     )
 
+    parser.add_argument(
+        "--exclude-values",
+        dest="exclude_values",
+        default=False,
+        action="store_true",
+        help="force the use of ... for values",
+    )
+
     parser.add_argument(
         "-q",
         "--quiet",
@@ -1446,6 +1510,7 @@ def main(args: Optional[List[str]] = None) -> None:
             recursive=opt.recursive,
             include_docstrings=opt.include_docstrings,
             include_private=opt.include_private,
+            max_expr_length=0 if opt.exclude_values else 50,
             patterns=patterns,
             output_file=file
         )
diff --git a/extern/nanobind/src/version.py b/extern/nanobind/src/version.py
index 55e1895f5..c35af656b 100755
--- a/extern/nanobind/src/version.py
+++ b/extern/nanobind/src/version.py
@@ -31,7 +31,8 @@ def get_version(root):
     else:
         print(version_core)
 
-# Write the semantic version to nanobind.h, pyproject.toml, and __init__.py.
+# Write the semantic version to nanobind.h, pyproject.toml, __init__.py,
+# and docs/bazel.rst.
 # The semver string must be either 'X.Y.Z' or 'X.Y.Z-devN', where X, Y, Z are
 # non-negative integers and N is a positive integer.
 def write_version(root, semver):
@@ -92,6 +93,27 @@ def write_version(root, semver):
         f.truncate()
         f.write(contents)
 
+    # write to docs/bazel.rst, but only if `semver` is not a dev release.
+    # This is because documentation is scoped only to the latest stable release.
+    if "dev" not in semver:
+        with open(os.path.join(root, "docs/bazel.rst"), "r+") as f:
+            contents = f.read()
+            contents = re.sub(
+                r"nanobind\s+v\d+(\.\d+)+",
+                r"nanobind v" + semver,
+                contents,
+                count=1,
+            )
+            contents = re.sub(
+                r'"nanobind_bazel", version = "\d+(\.\d+)+"',
+                r'"nanobind_bazel", version = "' + semver + '"',
+                contents,
+                count=1,
+            )
+            f.seek(0)
+            f.truncate()
+            f.write(contents)
+
 
 def main():
     root = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
@@ -107,4 +129,3 @@ def main():
 
 if __name__ == '__main__':
     main()
-
diff --git a/extern/nanobind/tests/CMakeLists.txt b/extern/nanobind/tests/CMakeLists.txt
index 441206001..b37297e8d 100644
--- a/extern/nanobind/tests/CMakeLists.txt
+++ b/extern/nanobind/tests/CMakeLists.txt
@@ -23,8 +23,8 @@ if (MSVC)
   elseif (NOT NB_TEST_CUDA)
     add_compile_options(/W4)
   endif()
-elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU")
-  add_compile_options(-Wall -Wextra -Wno-unused-local-typedefs)
+elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang|GNU|IntelLLVM")
+  add_compile_options(-Wall -Wextra)
 endif()
 
 if (UNIX AND (CMAKE_SIZEOF_VOID_P EQUAL 4) AND (CMAKE_SYSTEM_PROCESSOR STREQUAL i686))
@@ -84,7 +84,11 @@ set(TEST_NAMES
 )
 
 foreach (NAME ${TEST_NAMES})
-  nanobind_add_module(test_${NAME}_ext test_${NAME}.cpp ${NB_EXTRA_ARGS})
+  if (NAME STREQUAL classes)
+    nanobind_add_module(test_${NAME}_ext test_${NAME}.cpp test_${NAME}_extra.cpp ${NB_EXTRA_ARGS})
+  else()
+    nanobind_add_module(test_${NAME}_ext test_${NAME}.cpp ${NB_EXTRA_ARGS})
+  endif()
 
   if (NB_TEST_CUDA)
     set_property(SOURCE test_${NAME}.cpp PROPERTY LANGUAGE CUDA)
@@ -104,6 +108,7 @@ foreach (NAME functions classes ndarray jax tensorflow stl enum typing make_iter
     set(EXTRA
       MARKER_FILE py.typed
       PATTERN_FILE "${CMAKE_CURRENT_SOURCE_DIR}/pattern_file.nb"
+      EXCLUDE_VALUES
     )
     set(EXTRA_DEPENDS "${OUT_DIR}/py_stub_test.py")
   else()
@@ -172,6 +177,7 @@ set(TEST_FILES
   test_stubs.py
   test_typing.py
   test_thread.py
+  test_specialization.py
 
   # Stub reference files
   test_classes_ext.pyi.ref
diff --git a/extern/nanobind/tests/conftest.py b/extern/nanobind/tests/conftest.py
index a4e9ccfd9..7f11b5acd 100644
--- a/extern/nanobind/tests/conftest.py
+++ b/extern/nanobind/tests/conftest.py
@@ -3,4 +3,4 @@ def pytest_addoption(parser):
                      action='store_true',
                      dest="enable-slow-tests",
                      default=False,
-                     help="enable longrundecorated tests")
+                     help="enable long-running tests")
diff --git a/extern/nanobind/tests/inter_module.cpp b/extern/nanobind/tests/inter_module.cpp
index 20a1c685f..2ed9a9b40 100644
--- a/extern/nanobind/tests/inter_module.cpp
+++ b/extern/nanobind/tests/inter_module.cpp
@@ -4,6 +4,10 @@ Shared create_shared() {
     return { 123 };
 }
 
-bool check_shared(const Shared &shared) {
-    return shared.value == 123;
+bool check_shared(const Shared &shared, int expected) {
+    return shared.value == expected;
+}
+
+void increment_shared(Shared &shared) {
+    ++shared.value;
 }
diff --git a/extern/nanobind/tests/inter_module.h b/extern/nanobind/tests/inter_module.h
index c78498a6a..e13eeb7d1 100644
--- a/extern/nanobind/tests/inter_module.h
+++ b/extern/nanobind/tests/inter_module.h
@@ -11,4 +11,5 @@ struct EXPORT_SHARED Shared {
 };
 
 extern EXPORT_SHARED Shared create_shared();
-extern EXPORT_SHARED bool check_shared(const Shared &shared);
+extern EXPORT_SHARED bool check_shared(const Shared &shared, int expected);
+extern EXPORT_SHARED void increment_shared(Shared &shared);
diff --git a/extern/nanobind/tests/pattern_file.nb b/extern/nanobind/tests/pattern_file.nb
index 8b9939f58..b3bad981d 100644
--- a/extern/nanobind/tests/pattern_file.nb
+++ b/extern/nanobind/tests/pattern_file.nb
@@ -17,3 +17,9 @@ test_typing_ext.__prefix__:
 
 test_typing_ext.__suffix__:
     # a suffix
+
+test_typing_ext.Foo.__prefix__:
+    # a class prefix
+
+test_typing_ext.Foo.__suffix__:
+    # a class suffix
diff --git a/extern/nanobind/tests/py_stub_test.py b/extern/nanobind/tests/py_stub_test.py
index c0251baca..094535b7b 100644
--- a/extern/nanobind/tests/py_stub_test.py
+++ b/extern/nanobind/tests/py_stub_test.py
@@ -1,3 +1,4 @@
+"""Example module docstring."""
 import sys
 
 if sys.version_info < (3, 11, 0):
diff --git a/extern/nanobind/tests/py_stub_test.pyi.ref b/extern/nanobind/tests/py_stub_test.pyi.ref
index a520499cd..98e25141e 100644
--- a/extern/nanobind/tests/py_stub_test.pyi.ref
+++ b/extern/nanobind/tests/py_stub_test.pyi.ref
@@ -1,3 +1,5 @@
+"""Example module docstring."""
+
 from collections.abc import Callable
 from typing import TypeVar, overload
 
diff --git a/extern/nanobind/tests/test_classes.cpp b/extern/nanobind/tests/test_classes.cpp
index caaa9ec1e..51986cf53 100644
--- a/extern/nanobind/tests/test_classes.cpp
+++ b/extern/nanobind/tests/test_classes.cpp
@@ -13,6 +13,7 @@
 #include <vector>
 #include <nanobind/stl/detail/traits.h>
 #include "inter_module.h"
+#include "test_classes.h"
 
 namespace nb = nanobind;
 using namespace nb::literals;
@@ -37,6 +38,9 @@ struct Struct {
     ~Struct() { destructed++; if (nb::is_alive()) struct_destructed.push_back(i); }
 
     int value() const { return i; }
+    int value_plus(int j, int k, int l, int m, int n, int o, int p) const {
+        return i + j + k + l + m + n + o + p;
+    }
     int getstate() const { ++pickled; return i; }
     void set_value(int value) { i = value; }
     void setstate(int value) { unpickled++; i = value; }
@@ -120,11 +124,8 @@ struct UniqueInt {
 std::map<int, std::weak_ptr<UniqueInt>> UniqueInt::instances;
 
 int wrapper_tp_traverse(PyObject *self, visitproc visit, void *arg) {
-    // On Python 3.9+, we must traverse the implicit dependency
-    // of an object on its associated type object.
-    #if PY_VERSION_HEX >= 0x03090000
-        Py_VISIT(Py_TYPE(self));
-    #endif
+    // We must traverse the implicit dependency of an object on its associated type object.
+    Py_VISIT(Py_TYPE(self));
 
     // The tp_traverse method may be called after __new__ but before or during
     // __init__, before the C++ constructor has been called. We must not inspect
@@ -163,6 +164,7 @@ NB_MODULE(test_classes_ext, m) {
         .def(nb::init<>())
         .def(nb::init<int>())
         .def("value", &Struct::value)
+        .def("value_plus", &Struct::value_plus)
         .def("set_value", &Struct::set_value, "value"_a)
         .def("self", &Struct::self, nb::rv_policy::none)
         .def("none", [](Struct &) -> const Struct * { return nullptr; })
@@ -555,6 +557,25 @@ NB_MODULE(test_classes_ext, m) {
     using NonCopyableVec = std::vector<NonCopyable>;
     nb::class_<NonCopyableVec>(m, "NonCopyableVec");
 
+    struct PrivateNonCopyable {
+        static PrivateNonCopyable &get_instance() {
+            static PrivateNonCopyable i;
+            return i;
+        }
+
+        int get_int() { return 42; }
+    private:
+        PrivateNonCopyable() {}
+        PrivateNonCopyable(const PrivateNonCopyable&) = delete;
+        PrivateNonCopyable &operator=(const PrivateNonCopyable&) = delete;
+    };
+
+    // #1249 this didn't compile previously
+    struct my_call_guard {};
+    nb::class_<PrivateNonCopyable>(m, "PrivateNonCopyable")
+        .def_static("get_instance", &PrivateNonCopyable::get_instance, nb::call_guard<my_call_guard>(), nb::rv_policy::reference)
+        .def("get_int", &PrivateNonCopyable::get_int);
+
     m.def("is_int_1", [](nb::handle h) { return nb::isinstance<int>(h); });
     m.def("is_int_2", [](nb::handle h) { return nb::isinstance<nb::int_>(h); });
     m.def("is_struct", [](nb::handle h) { return nb::isinstance<Struct>(h); });
@@ -644,6 +665,11 @@ NB_MODULE(test_classes_ext, m) {
                nb::is_weak_referenceable(), nb::dynamic_attr())
         .def(nb::init<int>());
 
+    // test50_weakref_with_slots_subclass
+    struct StructWithWeakrefsOnly : Struct { };
+    nb::class_<StructWithWeakrefsOnly, Struct>(m, "StructWithWeakrefsOnly", nb::is_weak_referenceable())
+        .def(nb::init<int>());
+
     union Union {
         int i;
         float f;
@@ -680,6 +706,7 @@ NB_MODULE(test_classes_ext, m) {
     // issue #786
     struct NewNone {};
     struct NewDflt { int value; };
+    struct NewStarPosOnly { size_t value; };
     struct NewStar { size_t value; };
     nb::class_<NewNone>(m, "NewNone")
         .def(nb::new_([]() { return NewNone(); }));
@@ -687,6 +714,12 @@ NB_MODULE(test_classes_ext, m) {
         .def(nb::new_([](int value) { return NewDflt{value}; }),
              "value"_a = 42)
         .def_ro("value", &NewDflt::value);
+    nb::class_<NewStarPosOnly>(m, "NewStarPosOnly")
+        .def(nb::new_([](nb::args a, int value) {
+            return NewStarPosOnly{nb::len(a) + value};
+        }),
+            "args"_a, "value"_a = 42)
+        .def_ro("value", &NewStarPosOnly::value);
     nb::class_<NewStar>(m, "NewStar")
         .def(nb::new_([](nb::args a, int value, nb::kwargs k) {
             return NewStar{nb::len(a) + value + 10 * nb::len(k)};
@@ -731,4 +764,39 @@ NB_MODULE(test_classes_ext, m) {
         .def_prop_ro_static("x", [](nb::handle /*unused*/) { return 42; });
     nb::class_<StaticPropertyOverride2, StaticPropertyOverride>(m, "StaticPropertyOverride2")
         .def_prop_ro_static("x", [](nb::handle /*unused*/) { return 43; });
+
+
+    // nanobind::detail::trampoline's constructor must be constexpr otherwise
+    // the trampoline will not compile under MSVC
+    struct ConstexprClass {
+        constexpr ConstexprClass(int i) : something(i) {}
+        virtual ~ConstexprClass() = default;
+
+        virtual int getInt() const {
+            return 1;
+        };
+
+        int something;
+    };
+
+    struct PyConstexprClass : ConstexprClass {
+        NB_TRAMPOLINE(ConstexprClass, 1);
+
+        int getInt() const override {
+            NB_OVERRIDE(getInt);
+        }
+    };
+
+    auto constexpr_class = nb::class_<ConstexprClass, PyConstexprClass>(m, "ConstexprClass")
+        .def(nb::init<int>())
+        .def("getInt", &ConstexprClass::getInt);
+
+    m.def("constexpr_call_getInt", [](ConstexprClass *c) {
+        return c->getInt();
+    });
+
+    auto never_destruct_class = nb::class_<NeverDestruct>(m, "NeverDestruct", nb::never_destruct())
+        .def_static("make_ref", &NeverDestruct::make, nb::rv_policy::reference)
+        .def("var", &NeverDestruct::var)
+        .def("set_var", &NeverDestruct::set_var);
 }
diff --git a/extern/nanobind/tests/test_classes.h b/extern/nanobind/tests/test_classes.h
new file mode 100644
index 000000000..c42652d02
--- /dev/null
+++ b/extern/nanobind/tests/test_classes.h
@@ -0,0 +1,21 @@
+#pragma once
+
+#include <memory>
+
+class NeverDestruct {
+  public:
+    static NeverDestruct& make();
+
+    NeverDestruct(const NeverDestruct&) = delete;
+    NeverDestruct& operator=(const NeverDestruct&) = delete;
+
+    int var() const;
+    void set_var(int i);
+
+  private:
+    NeverDestruct();
+
+    // incomplete type error if nanobind tries to instantiate the destructor
+    struct NDImpl;
+    std::unique_ptr<NDImpl> impl;
+};
diff --git a/extern/nanobind/tests/test_classes.py b/extern/nanobind/tests/test_classes.py
index a34fa65a0..78c6c9f0d 100644
--- a/extern/nanobind/tests/test_classes.py
+++ b/extern/nanobind/tests/test_classes.py
@@ -3,11 +3,6 @@
 import pytest
 from common import skip_on_pypy, collect
 
-# Some helper declaration to check types across different Python versions
-if sys.version_info < (3, 9):
-    TYPING_TYPE = "typing.Type"
-else:
-    TYPING_TYPE = "type"
 
 
 def optional(arg: str, /) -> str:
@@ -59,6 +54,7 @@ def test02_static_overload():
 def test03_instantiate(clean):
     s1: t.Struct = t.Struct()
     assert s1.value() == 5
+    assert s1.value_plus(1, 2, 3, 4, 5, 6, 7) == 33
     s2 = t.Struct(10)
     assert s2.value() == 10
     del s1
@@ -527,7 +523,7 @@ def test23_handle_t(clean):
 def test24_type_object_t(clean):
     assert (
         t.test_type_object_t.__doc__
-        == f"test_type_object_t(arg: {TYPING_TYPE}[test_classes_ext.Struct], /) -> object"
+        == "test_type_object_t(arg: type[test_classes_ext.Struct], /) -> object"
     )
 
     assert t.test_type_object_t(t.Struct) is t.Struct
@@ -895,6 +891,10 @@ def test46_custom_new():
     t.NewNone()
     assert t.NewDflt().value == 42
     assert t.NewDflt(10).value == 10
+    assert t.NewStarPosOnly().value == 42
+    assert t.NewStarPosOnly("hi").value == 43
+    assert t.NewStarPosOnly(value=10).value == 10
+    assert t.NewStarPosOnly("hi", "lo", value=10).value == 12
     assert t.NewStar().value == 42
     assert t.NewStar("hi").value == 43
     assert t.NewStar(value=10).value == 10
@@ -941,3 +941,54 @@ def my_init(self):
 def test49_static_property_override():
     assert t.StaticPropertyOverride.x == 42
     assert t.StaticPropertyOverride2.x == 43
+
+def test50_weakref_with_slots_subclass():
+    """
+    Test that Python subclasses work correctly with nb::is_weak_referenceable()
+    base classes. The nb::is_weak_referenceable() flag causes nanobind to
+    install tp_traverse/tp_clear callbacks. When Python subclasses add their
+    own instance dictionaries (e.g., via managed dicts on Python 3.12+),
+    subtype_traverse calls our tp_traverse. We must only traverse dicts/weaklists
+    created by nanobind, not those added by Python.
+
+    Regression test for issue #1201.
+    """
+    import gc
+
+    # Create a Python subclass with __slots__
+    class SubClass(t.StructWithWeakrefsOnly):
+        __slots__ = 'hello',
+
+    # Create a sub-subclass without __slots__ (which should get a __dict__)
+    class SubSubClass(SubClass):
+        pass
+
+    # This should not crash
+    x = SubSubClass(42)
+    x.bye = 'blah'
+    assert x.value() == 42
+    assert x.bye == 'blah'
+
+    # Trigger GC to ensure inst_traverse doesn't crash
+    gc.collect()
+    gc.collect()
+
+    # Clean up
+    del x
+    gc.collect()
+
+def test51_constexpr_trampoline():
+    class PyConstexprClass(t.ConstexprClass):
+        def getInt(self):
+            return 42
+
+    c = PyConstexprClass(4)
+    assert t.constexpr_call_getInt(c) == 42
+
+def test52_noncopyable():
+    assert t.PrivateNonCopyable.get_instance().get_int() == 42
+
+def test53_never_destruct():
+    r = t.NeverDestruct.make_ref()
+    r.set_var(5)
+    assert r.var() == 5
diff --git a/extern/nanobind/tests/test_classes_ext.pyi.ref b/extern/nanobind/tests/test_classes_ext.pyi.ref
index d9b859353..e816904fc 100644
--- a/extern/nanobind/tests/test_classes_ext.pyi.ref
+++ b/extern/nanobind/tests/test_classes_ext.pyi.ref
@@ -12,6 +12,8 @@ class Struct:
 
     def value(self) -> int: ...
 
+    def value_plus(self, arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, /) -> int: ...
+
     def set_value(self, value: int) -> None: ...
 
     def self(self) -> Struct: ...
@@ -230,6 +232,12 @@ class Wrapper:
 class NonCopyableVec:
     pass
 
+class PrivateNonCopyable:
+    @staticmethod
+    def get_instance() -> PrivateNonCopyable: ...
+
+    def get_int(self) -> int: ...
+
 def is_int_1(arg: object, /) -> bool: ...
 
 def is_int_2(arg: object, /) -> bool: ...
@@ -281,6 +289,9 @@ class StructWithWeakrefs(Struct):
 class StructWithWeakrefsAndDynamicAttrs(Struct):
     def __init__(self, arg: int, /) -> None: ...
 
+class StructWithWeakrefsOnly(Struct):
+    def __init__(self, arg: int, /) -> None: ...
+
 class Union:
     def __init__(self) -> None: ...
 
@@ -335,6 +346,12 @@ class NewDflt:
     @property
     def value(self) -> int: ...
 
+class NewStarPosOnly:
+    def __init__(self, *args, value: int = 42) -> None: ...
+
+    @property
+    def value(self) -> int: ...
+
 class NewStar:
     def __init__(self, *args, value: int = 42, **kwargs) -> None: ...
 
@@ -360,3 +377,18 @@ class StaticPropertyOverride:
 class StaticPropertyOverride2(StaticPropertyOverride):
     x: int = ...
     """(arg: object, /) -> int"""
+
+class ConstexprClass:
+    def __init__(self, arg: int, /) -> None: ...
+
+    def getInt(self) -> int: ...
+
+def constexpr_call_getInt(arg: ConstexprClass, /) -> int: ...
+
+class NeverDestruct:
+    @staticmethod
+    def make_ref() -> NeverDestruct: ...
+
+    def var(self) -> int: ...
+
+    def set_var(self, arg: int, /) -> None: ...
diff --git a/extern/nanobind/tests/test_classes_extra.cpp b/extern/nanobind/tests/test_classes_extra.cpp
new file mode 100644
index 000000000..ff6add900
--- /dev/null
+++ b/extern/nanobind/tests/test_classes_extra.cpp
@@ -0,0 +1,22 @@
+#include "test_classes.h"
+
+struct NeverDestruct::NDImpl{
+    int var = 0;
+};
+
+NeverDestruct::NeverDestruct() {
+    impl = std::make_unique<NeverDestruct::NDImpl>();
+}
+
+int NeverDestruct::var() const {
+    return impl->var;
+}
+
+void NeverDestruct::set_var(int i) {
+    impl->var = i;
+}
+
+NeverDestruct& NeverDestruct::make() {
+    static NeverDestruct nd;
+    return nd;
+}
diff --git a/extern/nanobind/tests/test_eigen.py b/extern/nanobind/tests/test_eigen.py
index f8d5ac049..cd5a2a5d6 100644
--- a/extern/nanobind/tests/test_eigen.py
+++ b/extern/nanobind/tests/test_eigen.py
@@ -466,3 +466,20 @@ def test17_sparse_map_complex():
     c1 = scipy.sparse.csc_matrix([[1j+2, 0], [-3j, 1]], dtype=np.complex128)
     c2 = t.sparse_complex_map_c(c1)
     assert np.array_equal(c1.todense(), c2.todense())
+
+
+@needs_numpy_and_eigen
+def test18_zero_size_vec():
+    # Test for stride issues after numpy 2.4, when using
+    a = np.ones((0, 2), dtype=np.uint32, order='C')
+    b = np.ones((0, 2), dtype=np.uint32, order='C')
+    print(a.strides)
+    print(b.strides)
+    assert_array_equal(t.addRefCnstMXuCC(a, b), a + b)
+    assert_array_equal(t.addRefCnstMXuCC_nc(a, b), a + b)
+    assert_array_equal(t.addMapCnstMXuCC(a, b), a + b)
+
+    c = np.zeros(0, dtype=np.int32)
+    assert_array_equal(t.castToRefVXi(c), c)
+    assert_array_equal(t.castToMapCnstVXi(c), c)
+
diff --git a/extern/nanobind/tests/test_enum.cpp b/extern/nanobind/tests/test_enum.cpp
index 08698a785..71badf3e2 100644
--- a/extern/nanobind/tests/test_enum.cpp
+++ b/extern/nanobind/tests/test_enum.cpp
@@ -19,6 +19,9 @@ struct EnumProperty { Enum get_enum() { return Enum::A; } };
 enum class OpaqueEnum { X, Y };
 NB_MAKE_OPAQUE(OpaqueEnum)
 
+// Enum with members named 'name' and 'value' to test stubgen (issue #1246)
+enum class Item { name, value, extra };
+
 NB_MODULE(test_enum_ext, m) {
     nb::enum_<Enum>(m, "Enum", "enum-level docstring")
         .value("A", Enum::A, "Value A")
@@ -85,4 +88,12 @@ NB_MODULE(test_enum_ext, m) {
         })
         .def(nb::self == nb::self);
     nb::implicitly_convertible<std::string, OpaqueEnum>();
+
+    // Enum with members named 'name' and 'value' (issue #1246)
+    nb::enum_<Item>(m, "Item")
+        .value("name", Item::name)
+        .value("value", Item::value)
+        .value("extra", Item::extra);
+
+    m.def("item_to_int", [](Item i) { return (int) i; }, nb::arg("item") = Item::name);
 }
diff --git a/extern/nanobind/tests/test_enum.py b/extern/nanobind/tests/test_enum.py
index 560578051..38f165a96 100644
--- a/extern/nanobind/tests/test_enum.py
+++ b/extern/nanobind/tests/test_enum.py
@@ -187,3 +187,18 @@ def test09_enum_methods():
 
 def test10_enum_opaque():
     assert t.OpaqueEnum.X == t.OpaqueEnum("X") and t.OpaqueEnum.Y == t.OpaqueEnum("Y")
+
+def test11_enum_name_value_members():
+    # Test for issue #1246: enums with members named 'name' or 'value'
+    # When an enum has members named 'name' or 'value', accessing .name/.value
+    # returns the enum member instead of the attribute. Use _name_/_value_.
+    assert t.Item.name._value_ == 0
+    assert t.Item.value._value_ == 1
+    assert t.Item.extra._value_ == 2
+    assert t.Item.name._name_ == 'name'
+    assert t.Item.value._name_ == 'value'
+    assert t.Item.extra._name_ == 'extra'
+    assert t.item_to_int(t.Item.name) == 0
+    assert t.item_to_int(t.Item.value) == 1
+    assert t.item_to_int(t.Item.extra) == 2
+    assert t.item_to_int() == 0  # default is Item.name
diff --git a/extern/nanobind/tests/test_enum_ext.pyi.ref b/extern/nanobind/tests/test_enum_ext.pyi.ref
index 3e6286e37..3e0c06a5e 100644
--- a/extern/nanobind/tests/test_enum_ext.pyi.ref
+++ b/extern/nanobind/tests/test_enum_ext.pyi.ref
@@ -120,3 +120,12 @@ class OpaqueEnum:
     """(arg: object, /) -> test_enum_ext.OpaqueEnum"""
 
     def __eq__(self, arg: OpaqueEnum, /) -> bool: ...
+
+class Item(enum.Enum):
+    name = 0
+
+    value = 1
+
+    extra = 2
+
+def item_to_int(item: Item = Item.name) -> int: ...
diff --git a/extern/nanobind/tests/test_functions.cpp b/extern/nanobind/tests/test_functions.cpp
index 8104a6d8d..3c2010dfc 100644
--- a/extern/nanobind/tests/test_functions.cpp
+++ b/extern/nanobind/tests/test_functions.cpp
@@ -138,6 +138,12 @@ NB_MODULE(test_functions_ext, m) {
         return std::make_pair(args.size(), kwargs.size());
     }, "a"_a, "b"_a, "myargs"_a, "mykwargs"_a);
 
+    /// Function with eight arguments
+    m.def("test_simple",
+        [](int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7) {
+            return i0 + i1 + i2 + i3 + i4 + i5 + i6 - i7;
+        });
+
     /// Test successful/unsuccessful tuple conversion, with rich output types
     m.def("test_tuple", []() -> nb::typed<nb::tuple, std::string, int> {
         return nb::make_tuple("Hello", 123); });
diff --git a/extern/nanobind/tests/test_functions.py b/extern/nanobind/tests/test_functions.py
index eeae614f4..aa779b892 100644
--- a/extern/nanobind/tests/test_functions.py
+++ b/extern/nanobind/tests/test_functions.py
@@ -3,11 +3,6 @@
 import sys
 import re
 
-# Some helper declaration to check types across different Python versions
-if sys.version_info < (3, 9):
-    TYPING_TUPLE = "typing.Tuple"
-else:
-    TYPING_TUPLE = "tuple"
 
 # Reference counting behavior changed on 3.14a7+
 py_3_14a7_or_newer = sys.version_info >= (3, 14, 0, 'alpha', 7)
@@ -22,6 +17,7 @@ def test01_capture():
     assert t.test_02(5, 3) == 2
     assert t.test_03(5, 3) == 44
     assert t.test_04() == 60
+    assert t.test_simple(0, 1, 2, 3, 4, 5, 6, 7) == 14
 
 
 def test02_default_args():
@@ -91,8 +87,8 @@ def test05_signature():
     )
 
     assert t.test_07.__doc__ == (
-        f"test_07(arg0: int, arg1: int, /, *args, **kwargs) -> {TYPING_TUPLE}[int, int]\n"
-        f"test_07(a: int, b: int, *myargs, **mykwargs) -> {TYPING_TUPLE}[int, int]"
+        "test_07(arg0: int, arg1: int, /, *args, **kwargs) -> tuple[int, int]\n"
+        "test_07(a: int, b: int, *myargs, **mykwargs) -> tuple[int, int]"
     )
 
 
@@ -283,7 +279,6 @@ def test23_byte_return():
     assert t.test_18("hello world", 5) == b"hello"
 
 
-@pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher")
 def test24_pydoc():
     import pydoc
 
@@ -480,19 +475,18 @@ def test40_nb_signature():
         (r"def test_05(arg: int, /) -> int", "doc_1", None),
         (r"def test_05(arg: float, /) -> int", "doc_2", None),
     )
-    if sys.version_info >= (3, 9):
-        assert t.test_07.__nb_signature__ == (
-            (
-                r"def test_07(arg0: int, arg1: int, /, *args, **kwargs) -> tuple[int, int]",
-                None,
-                None,
-            ),
-            (
-                r"def test_07(a: int, b: int, *myargs, **mykwargs) -> tuple[int, int]",
-                None,
-                None,
-            ),
-        )
+    assert t.test_07.__nb_signature__ == (
+        (
+            r"def test_07(arg0: int, arg1: int, /, *args, **kwargs) -> tuple[int, int]",
+            None,
+            None,
+        ),
+        (
+            r"def test_07(a: int, b: int, *myargs, **mykwargs) -> tuple[int, int]",
+            None,
+            None,
+        ),
+    )
 
 
 def test41_kw_only():
diff --git a/extern/nanobind/tests/test_functions_ext.pyi.ref b/extern/nanobind/tests/test_functions_ext.pyi.ref
index 64ba9af57..8809c5f6c 100644
--- a/extern/nanobind/tests/test_functions_ext.pyi.ref
+++ b/extern/nanobind/tests/test_functions_ext.pyi.ref
@@ -1,3 +1,5 @@
+"""function testcase"""
+
 from collections.abc import Callable
 import types
 from typing import Annotated, Any, overload
@@ -41,6 +43,8 @@ def test_07(arg0: int, arg1: int, /, *args, **kwargs) -> tuple[int, int]: ...
 @overload
 def test_07(a: int, b: int, *myargs, **mykwargs) -> tuple[int, int]: ...
 
+def test_simple(arg0: int, arg1: int, arg2: int, arg3: int, arg4: int, arg5: int, arg6: int, arg7: int, /) -> int: ...
+
 @overload
 def test_tuple() -> tuple[str, int]: ...
 
diff --git a/extern/nanobind/tests/test_inter_module.py b/extern/nanobind/tests/test_inter_module.py
index 97f618710..8a6f44321 100644
--- a/extern/nanobind/tests/test_inter_module.py
+++ b/extern/nanobind/tests/test_inter_module.py
@@ -4,10 +4,86 @@
 import pytest
 from common import xfail_on_pypy_darwin
 
+try:
+    from concurrent import interpreters  # Added in Python 3.14
+    def needs_interpreters(x):
+        return x
+except:
+    needs_interpreters = pytest.mark.skip(reason="interpreters required")
+
+
 @xfail_on_pypy_darwin
 def test01_inter_module():
     s = t1.create_shared()
-    assert t2.check_shared(s)
+    assert t2.check_shared(s, 123)
+    t2.increment_shared(s)
+    assert t2.check_shared(s, 124)
     with pytest.raises(TypeError) as excinfo:
         assert t3.check_shared(s)
     assert 'incompatible function arguments' in str(excinfo.value)
+
+
+@xfail_on_pypy_darwin
+def test02_reload_module():
+    s1 = t1.create_shared()
+    s2 = t1.create_shared()
+    assert s2 is not s1
+    assert type(s2) is type(s1)
+    t2.increment_shared(s2)
+    import importlib
+    new_t1 = importlib.reload(t1)
+    assert new_t1 is t1
+    s3 = new_t1.create_shared()
+    assert type(s3) is type(s1)
+    new_t2 = importlib.reload(t2)
+    assert new_t2 is t2
+    s4 = new_t1.create_shared()
+    assert type(s4) is type(s1)
+    assert new_t2.check_shared(s2, 124)
+
+
+@xfail_on_pypy_darwin
+def test03_reimport_module():
+    s1 = t1.create_shared()
+    s2 = t1.create_shared()
+    t2.increment_shared(s2)
+    import sys
+    del sys.modules['test_inter_module_1_ext']
+    import test_inter_module_1_ext as new_t1
+    assert new_t1 is not t1
+    s3 = new_t1.create_shared()
+    assert type(s3) is type(s1)
+    del sys.modules['test_inter_module_2_ext']
+    with pytest.warns(RuntimeWarning, match="'Shared' was already registered"):
+        import test_inter_module_2_ext as new_t2
+    assert new_t2 is not t2
+    s4 = new_t1.create_shared()
+    assert type(s4) is type(s1)
+    assert new_t2.check_shared(s2, 124)
+
+
+def run():
+    import sys
+    if 'tests' not in sys.path[0]:
+        import os
+        builddir = sys.path[0]
+        sys.path.insert(0, os.path.join(builddir, 'tests', 'Release'))
+        sys.path.insert(0, os.path.join(builddir, 'tests', 'Debug'))
+        sys.path.insert(0, os.path.join(builddir, 'tests'))
+    import test_inter_module_1_ext as new_t1
+    import test_inter_module_2_ext as new_t2
+    success = True
+    s = new_t1.create_shared()
+    success &= new_t2.check_shared(s, 123)
+    new_t2.increment_shared(s)
+    success &= new_t2.check_shared(s, 124)
+    return success
+
+@needs_interpreters
+def test04_subinterpreters():
+    assert run()
+    interp = interpreters.create()
+    with pytest.raises(interpreters.ExecutionFailed) as excinfo:
+        assert interp.call(run)
+    assert 'does not support loading in subinterpreters' in str(excinfo.value)
+    interp.close()
diff --git a/extern/nanobind/tests/test_inter_module_2.cpp b/extern/nanobind/tests/test_inter_module_2.cpp
index 3e0a7785b..5bea70b8b 100644
--- a/extern/nanobind/tests/test_inter_module_2.cpp
+++ b/extern/nanobind/tests/test_inter_module_2.cpp
@@ -6,4 +6,5 @@ namespace nb = nanobind;
 NB_MODULE(test_inter_module_2_ext, m) {
     nb::class_<Shared>(m, "Shared");
     m.def("check_shared", &check_shared);
+    m.def("increment_shared", &increment_shared);
 }
diff --git a/extern/nanobind/tests/test_jax.cpp b/extern/nanobind/tests/test_jax.cpp
index 0729d1ddd..9f9f597c3 100644
--- a/extern/nanobind/tests/test_jax.cpp
+++ b/extern/nanobind/tests/test_jax.cpp
@@ -8,15 +8,18 @@ int destruct_count = 0;
 NB_MODULE(test_jax_ext, m) {
     m.def("destruct_count", []() { return destruct_count; });
     m.def("ret_jax", []() {
-        float *f = new float[8] { 1, 2, 3, 4, 5, 6, 7, 8 };
+        struct alignas(64) Buf {
+            float f[8];
+        };
+        Buf *buf = new Buf({ 1, 2, 3, 4, 5, 6, 7, 8 });
         size_t shape[2] = { 2, 4 };
 
-        nb::capsule deleter(f, [](void *data) noexcept {
+        nb::capsule deleter(buf, [](void *p) noexcept {
            destruct_count++;
-           delete[] (float *) data;
+           delete (Buf *) p;
         });
 
-        return nb::ndarray<nb::jax, float, nb::shape<2, 4>>(f, 2, shape,
+        return nb::ndarray<nb::jax, float, nb::shape<2, 4>>(buf->f, 2, shape,
                                                             deleter);
     });
 }
diff --git a/extern/nanobind/tests/test_jax.py b/extern/nanobind/tests/test_jax.py
index e69de29bb..6802e6df4 100644
--- a/extern/nanobind/tests/test_jax.py
+++ b/extern/nanobind/tests/test_jax.py
@@ -0,0 +1,97 @@
+import test_ndarray_ext as t
+import test_jax_ext as tj
+import pytest
+import warnings
+import importlib
+from common import collect
+
+try:
+    import jax.numpy as jnp
+    def needs_jax(x):
+        return x
+except:
+    needs_jax = pytest.mark.skip(reason="JAX is required")
+
+
+@needs_jax
+def test01_constrain_order():
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        try:
+            c = jnp.zeros((3, 5))
+        except:
+            pytest.skip('jax is missing')
+
+    z = jnp.zeros((3, 5, 4, 6))
+    assert t.check_order(z) == 'C'
+
+
+@needs_jax
+def test02_implicit_conversion():
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        try:
+            c = jnp.zeros((3, 5))
+        except:
+            pytest.skip('jax is missing')
+
+    t.implicit(jnp.zeros((2, 2), dtype=jnp.int32))
+    t.implicit(jnp.zeros((2, 2, 10), dtype=jnp.float32)[:, :, 4])
+    t.implicit(jnp.zeros((2, 2, 10), dtype=jnp.int32)[:, :, 4])
+    t.implicit(jnp.zeros((2, 2, 10), dtype=jnp.bool_)[:, :, 4])
+
+    with pytest.raises(TypeError) as excinfo:
+        t.noimplicit(jnp.zeros((2, 2), dtype=jnp.int32))
+
+    with pytest.raises(TypeError) as excinfo:
+        t.noimplicit(jnp.zeros((2, 2), dtype=jnp.uint8))
+
+
+@needs_jax
+def test03_return_jax():
+    collect()
+    dc = tj.destruct_count()
+    x = tj.ret_jax()
+    assert x.shape == (2, 4)
+    assert jnp.all(x == jnp.array([[1,2,3,4], [5,6,7,8]], dtype=jnp.float32))
+    del x
+    collect()
+    assert tj.destruct_count() - dc == 1
+
+
+@needs_jax
+def test04_check():
+    assert t.check(jnp.zeros((1)))
+
+
+@needs_jax
+def test05_passthrough():
+    a = tj.ret_jax()
+    b = t.passthrough(a)
+    assert a is b
+
+    a = jnp.array([1, 2, 3])
+    b = t.passthrough(a)
+    assert a is b
+
+    a = None
+    with pytest.raises(TypeError) as excinfo:
+        b = t.passthrough(a)
+    assert 'incompatible function arguments' in str(excinfo.value)
+    b = t.passthrough_arg_none(a)
+    assert a is b
+
+
+@needs_jax
+def test06_ro_array():
+    if (not hasattr(jnp, '__array_api_version__')
+        or jnp.__array_api_version__ < '2024'):
+        pytest.skip('jax version is too old')
+    a = jnp.array([1, 2], dtype=jnp.float32)  # JAX arrays are immutable.
+    assert t.accept_ro(a) == 1
+    # If the next line fails, delete it, update the array_api_version above,
+    # and uncomment the three lines below.
+    assert t.accept_rw(a) == 1
+    # with pytest.raises(TypeError) as excinfo:
+    #     t.accept_rw(a)
+    # assert 'incompatible function arguments' in str(excinfo.value)
diff --git a/extern/nanobind/tests/test_ndarray.cpp b/extern/nanobind/tests/test_ndarray.cpp
index eda0374ed..df46dfed3 100644
--- a/extern/nanobind/tests/test_ndarray.cpp
+++ b/extern/nanobind/tests/test_ndarray.cpp
@@ -1,5 +1,6 @@
 #include <nanobind/nanobind.h>
 #include <nanobind/ndarray.h>
+#include <nanobind/stl/pair.h>
 #include <algorithm>
 #include <complex>
 #include <vector>
@@ -12,9 +13,9 @@ int destruct_count = 0;
 static float f_global[] { 1, 2, 3, 4, 5, 6, 7, 8 };
 static int i_global[] { 1, 2, 3, 4, 5, 6, 7, 8 };
 
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(__AVX512FP16__)
 namespace nanobind::detail {
-    template <> struct dtype_traits<__fp16> {
+    template <> struct dtype_traits<_Float16> {
         static constexpr dlpack::dtype value {
             (uint8_t) dlpack::dtype_code::Float, // type code
             16, // size in bits
@@ -188,6 +189,12 @@ NB_MODULE(test_ndarray_ext, m) {
     m.def("check_device", [](nb::ndarray<nb::device::cpu>) -> const char * { return "cpu"; });
     m.def("check_device", [](nb::ndarray<nb::device::cuda>) -> const char * { return "cuda"; });
 
+    m.def("initialize",
+          [](nb::ndarray<unsigned char, nb::shape<10>, nb::device::cpu> &t) {
+              for (size_t i = 0; i < 10; ++i)
+                t(i) = (unsigned char) i;
+          });
+
     m.def("initialize",
           [](nb::ndarray<float, nb::shape<10>, nb::device::cpu> &t) {
               for (size_t i = 0; i < 10; ++i)
@@ -240,7 +247,8 @@ NB_MODULE(test_ndarray_ext, m) {
     });
 
     m.def("destruct_count", []() { return destruct_count; });
-    m.def("return_dlpack", []() {
+
+    m.def("return_no_framework", []() {
         float *f = new float[8] { 1, 2, 3, 4, 5, 6, 7, 8 };
         size_t shape[2] = { 2, 4 };
 
@@ -299,16 +307,41 @@ NB_MODULE(test_ndarray_ext, m) {
                                                                 deleter);
     });
 
+    m.def("ret_memview", []() {
+        double *d = new double[8] { 1, 2, 3, 4, 5, 6, 7, 8 };
+        size_t shape[2] = { 2, 4 };
+
+        nb::capsule deleter(d, [](void *data) noexcept {
+           destruct_count++;
+           delete[] (double *) data;
+        });
+
+        return nb::ndarray<nb::memview, double, nb::shape<2, 4>>(d, 2, shape,
+                                                                 deleter);
+    });
+
+    m.def("ret_array_api", []() {
+        double *d = new double[8] { 1, 2, 3, 4, 5, 6, 7, 8 };
+        size_t shape[2] = { 2, 4 };
+
+        nb::capsule deleter(d, [](void *data) noexcept {
+           destruct_count++;
+           delete[] (double *) data;
+        });
+
+        return nb::ndarray<nb::array_api, double, nb::shape<2, 4>>(d, 2, shape,
+                                                                   deleter);
+    });
+
     m.def("ret_array_scalar", []() {
-            float* f = new float[1] { 1 };
-            size_t shape[1] = {};
+            float* f = new float{ 1.0f };
 
             nb::capsule deleter(f, [](void* data) noexcept {
                 destruct_count++;
-                delete[] (float *) data;
+                delete (float *) data;
             });
 
-            return nb::ndarray<nb::numpy, float>(f, 0, shape, deleter);
+            return nb::ndarray<nb::numpy, float>(f, 0, nullptr, deleter);
     });
 
     m.def("noop_3d_c_contig",
@@ -338,7 +371,7 @@ NB_MODULE(test_ndarray_ext, m) {
            destruct_count++;
         }
 
-        float data [10] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
+        float data[10] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
     };
 
     nb::class_<Cls>(m, "Cls")
@@ -392,17 +425,17 @@ NB_MODULE(test_ndarray_ext, m) {
             v(i) = -v(i);
     }, "x"_a.noconvert());
 
-#if defined(__aarch64__)
+#if defined(__aarch64__) || defined(__AVX512FP16__)
     m.def("ret_numpy_half", []() {
-        __fp16 *f = new __fp16[8] { 1, 2, 3, 4, 5, 6, 7, 8 };
+        _Float16 *f = new _Float16[8] { 1, 2, 3, 4, 5, 6, 7, 8 };
         size_t shape[2] = { 2, 4 };
 
         nb::capsule deleter(f, [](void *data) noexcept {
             destruct_count++;
-            delete[] (__fp16*) data;
+            delete[] (_Float16*) data;
         });
-        return nb::ndarray<nb::numpy, __fp16, nb::shape<2, 4>>(f, 2, shape,
-                                                               deleter);
+        return nb::ndarray<nb::numpy, _Float16, nb::shape<2, 4>>(f, 2, shape,
+                                                                 deleter);
     });
 #endif
 
@@ -472,9 +505,7 @@ NB_MODULE(test_ndarray_ext, m) {
             Wrapper* w = nb::inst_ptr<Wrapper>(self);
             nb::handle value = nb::find(w->value);
             Py_VISIT(value.ptr());
-#if PY_VERSION_HEX >= 0x03090000
             Py_VISIT(Py_TYPE(self));
-#endif
             return 0;
         }
 
@@ -494,4 +525,33 @@ NB_MODULE(test_ndarray_ext, m) {
     nb::class_<Wrapper>(m, "Wrapper", nb::type_slots(wrapper_slots))
         .def(nb::init<nb::ndarray<float>>())
         .def_rw("value", &Wrapper::value);
+
+    // Example from docs/ndarray.rst in section "Array libraries"
+    class MyArray {
+        double* d;
+     public:
+        MyArray() { d = new double[5] { 0.0, 1.0, 2.0, 3.0, 4.0 }; }
+        ~MyArray() { delete[] d; }
+        double* data() const { return d; }
+        void mutate() { for (int i = 0; i < 5; ++i) d[i] += 0.5; }
+    };
+
+    nb::class_<MyArray>(m, "MyArray")
+       .def(nb::init<>())
+       .def("mutate", &MyArray::mutate)
+       .def("__dlpack__", [](nb::pointer_and_handle<MyArray> self,
+                             nb::kwargs kwargs) {
+               using array_api_t = nb::ndarray<nb::array_api, double>;
+               nb::object aa = nb::cast(array_api_t(self.p->data(), {5}),
+                                        nb::rv_policy::reference_internal,
+                                        self.h);
+               return aa.attr("__dlpack__")(**kwargs);
+           })
+       .def("__dlpack_device__", [](nb::handle /*self*/) {
+               return std::make_pair(nb::device::cpu::value, 0);
+           })
+       .def("array_api", [](const MyArray& self) {
+               return nb::ndarray<nb::array_api, double>(self.data(), {5});
+           }, nb::rv_policy::reference_internal);
+
 }
diff --git a/extern/nanobind/tests/test_ndarray.py b/extern/nanobind/tests/test_ndarray.py
index 75de2fecd..72ee7b939 100644
--- a/extern/nanobind/tests/test_ndarray.py
+++ b/extern/nanobind/tests/test_ndarray.py
@@ -1,10 +1,8 @@
 import test_ndarray_ext as t
-import test_jax_ext as tj
-import test_tensorflow_ext as tt
 import pytest
 import warnings
 import importlib
-from common import collect
+from common import collect, skip_on_pypy
 
 try:
     import numpy as np
@@ -20,21 +18,6 @@ def needs_torch(x):
 except:
     needs_torch = pytest.mark.skip(reason="PyTorch is required")
 
-try:
-    import tensorflow as tf
-    import tensorflow.config
-    def needs_tensorflow(x):
-        return x
-except:
-    needs_tensorflow = pytest.mark.skip(reason="TensorFlow is required")
-
-try:
-    import jax.numpy as jnp
-    def needs_jax(x):
-        return x
-except:
-    needs_jax = pytest.mark.skip(reason="JAX is required")
-
 try:
     import cupy as cp
     def needs_cupy(x):
@@ -150,27 +133,27 @@ def test04_constrain_shape():
         t.pass_float32_shaped(np.zeros((3, 5, 4, 6), dtype=np.float32))
 
 
+def test05_bytes():
+    a = bytearray(range(10))
+    assert t.get_is_valid(a)
+    assert t.get_shape(a) == [10]
+    assert t.get_size(a) == 10
+    assert t.get_nbytes(a) == 10
+    assert t.get_itemsize(a) == 1
+    assert t.check_order(a) == 'C'
+    b = b'hello'  # immutable
+    assert t.get_is_valid(b)
+    assert t.get_shape(b) == [5]
+
+
 @needs_numpy
-def test05_constrain_order():
+def test06_constrain_order_numpy():
     assert t.check_order(np.zeros((3, 5, 4, 6), order='C')) == 'C'
     assert t.check_order(np.zeros((3, 5, 4, 6), order='F')) == 'F'
     assert t.check_order(np.zeros((3, 5, 4, 6), order='C')[:, 2, :, :]) == '?'
     assert t.check_order(np.zeros((3, 5, 4, 6), order='F')[:, 2, :, :]) == '?'
 
 
-@needs_jax
-def test06_constrain_order_jax():
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        try:
-            c = jnp.zeros((3, 5))
-        except:
-            pytest.skip('jax is missing')
-
-    z = jnp.zeros((3, 5, 4, 6))
-    assert t.check_order(z) == 'C'
-
-
 @needs_torch
 @pytest.mark.filterwarnings
 def test07_constrain_order_pytorch():
@@ -190,20 +173,18 @@ def test07_constrain_order_pytorch():
         assert t.check_device(torch.zeros(3, 5, device='cuda')) == 'cuda'
 
 
-@needs_tensorflow
-def test08_constrain_order_tensorflow():
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        try:
-            c = tf.zeros((3, 5))
-        except:
-            pytest.skip('tensorflow is missing')
-
-    assert t.check_order(c) == 'C'
+def test08_write_bytes_from_cpp():
+    a = bytearray(10)
+    t.initialize(a)
+    assert a == bytearray(range(10))
+    b = b'helloHello'  # ten immutable bytes
+    with pytest.raises(TypeError) as excinfo:
+        t.initialize(b)
+    assert 'incompatible function arguments' in str(excinfo.value)
 
 
 @needs_numpy
-def test09_write_from_cpp():
+def test09_write_numpy_from_cpp():
     x = np.zeros(10, dtype=np.float32)
     t.initialize(x)
     assert np.all(x == np.arange(10, dtype=np.float32))
@@ -251,60 +232,27 @@ def test11_implicit_conversion_pytorch():
         t.noimplicit(torch.zeros(2, 2, 10, dtype=torch.float32)[:, :, 4])
 
 
-@needs_tensorflow
-def test12_implicit_conversion_tensorflow():
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        try:
-            c = tf.zeros((3, 5))
-        except:
-            pytest.skip('tensorflow is missing')
-
-        t.implicit(tf.zeros((2, 2), dtype=tf.int32))
-        t.implicit(tf.zeros((2, 2, 10), dtype=tf.float32)[:, :, 4])
-        t.implicit(tf.zeros((2, 2, 10), dtype=tf.int32)[:, :, 4])
-        t.implicit(tf.zeros((2, 2, 10), dtype=tf.bool)[:, :, 4])
-
-        with pytest.raises(TypeError) as excinfo:
-            t.noimplicit(tf.zeros((2, 2), dtype=tf.int32))
-
-        with pytest.raises(TypeError) as excinfo:
-            t.noimplicit(tf.zeros((2, 2), dtype=tf.bool))
-
-
-@needs_jax
-def test13_implicit_conversion_jax():
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        try:
-            c = jnp.zeros((3, 5))
-        except:
-            pytest.skip('jax is missing')
-
-    t.implicit(jnp.zeros((2, 2), dtype=jnp.int32))
-    t.implicit(jnp.zeros((2, 2, 10), dtype=jnp.float32)[:, :, 4])
-    t.implicit(jnp.zeros((2, 2, 10), dtype=jnp.int32)[:, :, 4])
-    t.implicit(jnp.zeros((2, 2, 10), dtype=jnp.bool_)[:, :, 4])
-
-    with pytest.raises(TypeError) as excinfo:
-        t.noimplicit(jnp.zeros((2, 2), dtype=jnp.int32))
-
-    with pytest.raises(TypeError) as excinfo:
-        t.noimplicit(jnp.zeros((2, 2), dtype=jnp.uint8))
+@needs_numpy
+def test12_process_image():
+    x = np.arange(120, dtype=np.ubyte).reshape(8, 5, 3)
+    t.process(x)
+    assert np.all(x == np.arange(0, 240, 2, dtype=np.ubyte).reshape(8, 5, 3))
 
 
-def test14_destroy_capsule():
+def test13_destroy_capsule():
     collect()
     dc = t.destruct_count()
-    a = t.return_dlpack()
-    assert dc == t.destruct_count()
-    del a
+    capsule = t.return_no_framework()
+    assert 'dltensor' in repr(capsule)
+    assert 'versioned' not in repr(capsule)
+    assert t.destruct_count() == dc
+    del capsule
     collect()
     assert t.destruct_count() - dc == 1
 
 
 @needs_numpy
-def test15_consume_numpy():
+def test14_consume_numpy():
     collect()
     class wrapper:
         def __init__(self, value):
@@ -312,31 +260,48 @@ def __init__(self, value):
         def __dlpack__(self):
             return self.value
     dc = t.destruct_count()
-    a = t.return_dlpack()
+    capsule = t.return_no_framework()
     if hasattr(np, '_from_dlpack'):
-        x = np._from_dlpack(wrapper(a))
+        x = np._from_dlpack(wrapper(capsule))
     elif hasattr(np, 'from_dlpack'):
-        x = np.from_dlpack(wrapper(a))
+        x = np.from_dlpack(wrapper(capsule))
     else:
         pytest.skip('your version of numpy is too old')
-
-    del a
+    del capsule
     collect()
     assert x.shape == (2, 4)
     assert np.all(x == [[1, 2, 3, 4], [5, 6, 7, 8]])
-    assert dc == t.destruct_count()
+    assert t.destruct_count() == dc
     del x
     collect()
     assert t.destruct_count() - dc == 1
 
 
 @needs_numpy
-def test16_passthrough():
+def test15_passthrough_numpy():
     a = t.ret_numpy()
     b = t.passthrough(a)
     assert a is b
 
-    a = np.array([1,2,3])
+    a = np.array([1, 2, 3])
+    b = t.passthrough(a)
+    assert a is b
+
+    a = None
+    with pytest.raises(TypeError) as excinfo:
+        b = t.passthrough(a)
+    assert 'incompatible function arguments' in str(excinfo.value)
+    b = t.passthrough_arg_none(a)
+    assert a is b
+
+
+@needs_torch
+def test16_passthrough_torch():
+    a = t.ret_pytorch()
+    b = t.passthrough(a)
+    assert a is b
+
+    a = torch.tensor([1, 2, 3])
     b = t.passthrough(a)
     assert a is b
 
@@ -354,6 +319,7 @@ def test17_return_numpy():
     dc = t.destruct_count()
     x = t.ret_numpy()
     assert x.shape == (2, 4)
+    assert x.flags.writeable
     assert np.all(x == [[1, 2, 3, 4], [5, 6, 7, 8]])
     del x
     collect()
@@ -376,29 +342,74 @@ def test18_return_pytorch():
     assert t.destruct_count() - dc == 1
 
 
-@needs_jax
-def test19_return_jax():
+@skip_on_pypy
+def test19_return_memview():
     collect()
-    dc = tj.destruct_count()
-    x = tj.ret_jax()
+    dc = t.destruct_count()
+    x = t.ret_memview()
+    assert isinstance(x, memoryview)
+    assert x.itemsize == 8
+    assert x.ndim == 2
     assert x.shape == (2, 4)
-    assert jnp.all(x == jnp.array([[1,2,3,4], [5,6,7,8]], dtype=jnp.float32))
+    assert x.strides == (32, 8)  # in bytes
+    assert x.tolist() == [[1, 2, 3, 4], [5, 6, 7, 8]]
     del x
     collect()
-    assert tj.destruct_count() - dc == 1
+    assert t.destruct_count() - dc == 1
 
 
-@needs_tensorflow
-def test20_return_tensorflow():
+@needs_numpy
+def test20_return_array_api():
     collect()
-    dc = tt.destruct_count()
-    x = tt.ret_tensorflow()
-    assert x.get_shape().as_list() == [2, 4]
-    assert tf.math.reduce_all(
-               x == tf.constant([[1,2,3,4], [5,6,7,8]], dtype=tf.float32))
-    del x
+    dc = t.destruct_count()
+    obj = t.ret_array_api()
+    assert obj.__dlpack_device__() == (1, 0)  # (type == CPU, id == 0)
+    capsule = obj.__dlpack__()
+    assert 'dltensor' in repr(capsule)
+    assert 'versioned' not in repr(capsule)
+    capsule = obj.__dlpack__(max_version=None)
+    assert 'dltensor' in repr(capsule)
+    assert 'versioned' not in repr(capsule)
+    capsule = obj.__dlpack__(max_version=(0, 0))  # (major == 0, minor == 0)
+    assert 'dltensor' in repr(capsule)
+    assert 'versioned' not in repr(capsule)
+    capsule = obj.__dlpack__(max_version=(1, 0))  # (major == 1, minor == 0)
+    assert 'dltensor_versioned' in repr(capsule)
+    with pytest.raises(TypeError) as excinfo:
+        capsule = obj.__dlpack__(0)
+    assert 'does not accept positional arguments' in str(excinfo.value)
+    del obj
     collect()
-    assert tt.destruct_count() - dc == 1
+    assert t.destruct_count() == dc
+    del capsule
+    collect()
+    assert t.destruct_count() - dc == 1
+    dc += 1
+
+    obj = t.ret_array_api()  # obj also supports the buffer protocol
+    mv = memoryview(obj)
+    assert mv.tolist() == [[1, 2, 3, 4], [5, 6, 7, 8]]
+    del obj
+    collect()
+    assert t.destruct_count() == dc
+    del mv
+    collect()
+    assert t.destruct_count() - dc == 1
+    dc += 1
+
+    if (hasattr(np, '__array_api_version__') and
+        np.__array_api_version__ >= '2024'):
+        obj = t.ret_array_api()
+        x = np.from_dlpack(obj)
+        del obj
+        collect()
+        assert t.destruct_count() == dc
+        assert x.shape == (2, 4)
+        assert x.flags.writeable
+        assert np.all(x == [[1, 2, 3, 4], [5, 6, 7, 8]])
+        del x
+        collect()
+        assert t.destruct_count() - dc == 1
 
 
 @needs_numpy
@@ -484,6 +495,8 @@ def test26_return_ro():
     assert t.ret_numpy_const_ref_f.__doc__  == 'ret_numpy_const_ref_f() -> numpy.ndarray[dtype=float32, shape=(2, 4), order=\'F\', writable=False]'
     assert x.shape == (2, 4)
     assert y.shape == (2, 4)
+    assert not x.flags.writeable
+    assert not y.flags.writeable
     assert np.all(x == [[1, 2, 3, 4], [5, 6, 7, 8]])
     assert np.all(y == [[1, 3, 5, 7], [2, 4, 6, 8]])
     with pytest.raises(ValueError) as excinfo:
@@ -494,26 +507,52 @@ def test26_return_ro():
     assert 'read-only' in str(excinfo.value)
 
 
+def test27_python_array():
+    import array
+    a = array.array('d', [0, 0, 0, 3.14159, 0])
+    assert t.check(a)
+    assert t.check_rw_by_value(a)
+    assert a[1] == 1.414214
+    assert t.check_rw_by_value_float64(a)
+    assert a[2] == 2.718282
+    assert a[4] == 16.0
+    assert t.check_ro_by_value_ro(a)
+    assert t.check_ro_by_value_const_float64(a)
+
+    a[1] = 0.1
+    a[2] = 0.2
+    a[4] = 0.4
+    mv = memoryview(a)
+    assert t.check(mv)
+    assert t.check_rw_by_value(mv)
+    assert a[1] == 1.414214
+    assert t.check_rw_by_value_float64(mv)
+    assert a[2] == 2.718282
+    assert a[4] == 16.0
+    assert t.check_ro_by_value_ro(mv)
+    assert t.check_ro_by_value_const_float64(mv)
+
+    x = t.passthrough(a)
+    assert x is a
+
+
+def test28_check_bytearray():
+    a = bytearray(b'xyz')
+    assert t.check(a)
+    mv = memoryview(a)
+    assert t.check(mv)
+
+
 @needs_numpy
-def test27_check_numpy():
+def test29_check_numpy():
     assert t.check(np.zeros(1))
 
 
 @needs_torch
-def test28_check_torch():
+def test30_check_torch():
     assert t.check(torch.zeros((1)))
 
 
-@needs_tensorflow
-def test29_check_tensorflow():
-    assert t.check(tf.zeros((1)))
-
-
-@needs_jax
-def test30_check_jax():
-    assert t.check(jnp.zeros((1)))
-
-
 @needs_numpy
 def test31_rv_policy():
     def p(a):
@@ -629,6 +668,7 @@ def test33_force_contig_numpy():
     assert b is not a
     assert np.all(b == a)
 
+
 @needs_torch
 @pytest.mark.filterwarnings
 def test34_force_contig_pytorch():
@@ -672,7 +712,7 @@ def test35_view():
     x2 = x1 * (-1+2j)
     t.fill_view_5(x1)
     assert np.allclose(x1, x2)
-    x2 = -x2;
+    x2 = -x2
     t.fill_view_6(x1)
     assert np.allclose(x1, x2)
 
@@ -686,6 +726,7 @@ def test36_half():
     assert x.shape == (2, 4)
     assert np.all(x == [[1, 2, 3, 4], [5, 6, 7, 8]])
 
+
 @needs_numpy
 def test37_cast():
     a = t.cast(False)
@@ -733,109 +774,109 @@ def test41_noninteger_stride():
     a = np.array([[1, 2, 3, 4, 0, 0], [5, 6, 7, 8, 0, 0]], dtype=np.float32)
     s = a[:, 0:4]  # slice
     t.pass_float32(s)
-    assert t.get_stride(s, 0) == 6;
-    assert t.get_stride(s, 1) == 1;
+    assert t.get_stride(s, 0) == 6
+    assert t.get_stride(s, 1) == 1
     try:
         v = s.view(np.complex64)
     except:
         pytest.skip('your version of numpy is too old')
     t.pass_complex64(v)
-    assert t.get_stride(v, 0) == 3;
-    assert t.get_stride(v, 1) == 1;
+    assert t.get_stride(v, 0) == 3
+    assert t.get_stride(v, 1) == 1
 
     a = np.array([[1, 2, 3, 4, 0], [5, 6, 7, 8, 0]], dtype=np.float32)
     s = a[:, 0:4]  # slice
     t.pass_float32(s)
-    assert t.get_stride(s, 0) == 5;
-    assert t.get_stride(s, 1) == 1;
+    assert t.get_stride(s, 0) == 5
+    assert t.get_stride(s, 1) == 1
     v = s.view(np.complex64)
     with pytest.raises(TypeError) as excinfo:
         t.pass_complex64(v)
     assert 'incompatible function arguments' in str(excinfo.value)
     with pytest.raises(TypeError) as excinfo:
-        t.get_stride(v, 0);
+        t.get_stride(v, 0)
     assert 'incompatible function arguments' in str(excinfo.value)
 
 
 @needs_numpy
 def test42_const_qualifiers_numpy():
     a = np.array([0, 0, 0, 3.14159, 0], dtype=np.float64)
-    assert t.check_rw_by_value(a);
-    assert a[1] == 1.414214;
-    assert t.check_rw_by_value_float64(a);
-    assert a[2] == 2.718282;
-    assert a[4] == 16.0;
-    assert t.check_ro_by_value_ro(a);
-    assert t.check_ro_by_value_const_float64(a);
+    assert t.check_rw_by_value(a)
+    assert a[1] == 1.414214
+    assert t.check_rw_by_value_float64(a)
+    assert a[2] == 2.718282
+    assert a[4] == 16.0
+    assert t.check_ro_by_value_ro(a)
+    assert t.check_ro_by_value_const_float64(a)
     a.setflags(write=False)
-    assert t.check_ro_by_value_ro(a);
-    assert t.check_ro_by_value_const_float64(a);
-    assert a[0] == 0.0;
-    assert a[3] == 3.14159;
+    assert t.check_ro_by_value_ro(a)
+    assert t.check_ro_by_value_const_float64(a)
+    assert a[0] == 0.0
+    assert a[3] == 3.14159
 
     a = np.array([0, 0, 0, 3.14159, 0], dtype=np.float64)
-    assert t.check_rw_by_const_ref(a);
-    assert a[1] == 1.414214;
-    assert t.check_rw_by_const_ref_float64(a);
-    assert a[2] == 2.718282;
-    assert a[4] == 16.0;
-    assert t.check_ro_by_const_ref_ro(a);
-    assert t.check_ro_by_const_ref_const_float64(a);
+    assert t.check_rw_by_const_ref(a)
+    assert a[1] == 1.414214
+    assert t.check_rw_by_const_ref_float64(a)
+    assert a[2] == 2.718282
+    assert a[4] == 16.0
+    assert t.check_ro_by_const_ref_ro(a)
+    assert t.check_ro_by_const_ref_const_float64(a)
     a.setflags(write=False)
-    assert t.check_ro_by_const_ref_ro(a);
-    assert t.check_ro_by_const_ref_const_float64(a);
-    assert a[0] == 0.0;
-    assert a[3] == 3.14159;
+    assert t.check_ro_by_const_ref_ro(a)
+    assert t.check_ro_by_const_ref_const_float64(a)
+    assert a[0] == 0.0
+    assert a[3] == 3.14159
 
     a = np.array([0, 0, 0, 3.14159, 0], dtype=np.float64)
-    assert t.check_rw_by_rvalue_ref(a);
-    assert a[1] == 1.414214;
-    assert t.check_rw_by_rvalue_ref_float64(a);
-    assert a[2] == 2.718282;
-    assert a[4] == 16.0;
-    assert t.check_ro_by_rvalue_ref_ro(a);
-    assert t.check_ro_by_rvalue_ref_const_float64(a);
+    assert t.check_rw_by_rvalue_ref(a)
+    assert a[1] == 1.414214
+    assert t.check_rw_by_rvalue_ref_float64(a)
+    assert a[2] == 2.718282
+    assert a[4] == 16.0
+    assert t.check_ro_by_rvalue_ref_ro(a)
+    assert t.check_ro_by_rvalue_ref_const_float64(a)
     a.setflags(write=False)
-    assert t.check_ro_by_rvalue_ref_ro(a);
-    assert t.check_ro_by_rvalue_ref_const_float64(a);
-    assert a[0] == 0.0;
-    assert a[3] == 3.14159;
+    assert t.check_ro_by_rvalue_ref_ro(a)
+    assert t.check_ro_by_rvalue_ref_const_float64(a)
+    assert a[0] == 0.0
+    assert a[3] == 3.14159
 
 
 @needs_torch
 def test43_const_qualifiers_pytorch():
     a = torch.tensor([0, 0, 0, 3.14159, 0], dtype=torch.float64)
-    assert t.check_rw_by_value(a);
-    assert a[1] == 1.414214;
-    assert t.check_rw_by_value_float64(a);
-    assert a[2] == 2.718282;
-    assert a[4] == 16.0;
-    assert t.check_ro_by_value_ro(a);
-    assert t.check_ro_by_value_const_float64(a);
-    assert a[0] == 0.0;
-    assert a[3] == 3.14159;
+    assert t.check_rw_by_value(a)
+    assert a[1] == 1.414214
+    assert t.check_rw_by_value_float64(a)
+    assert a[2] == 2.718282
+    assert a[4] == 16.0
+    assert t.check_ro_by_value_ro(a)
+    assert t.check_ro_by_value_const_float64(a)
+    assert a[0] == 0.0
+    assert a[3] == 3.14159
 
     a = torch.tensor([0, 0, 0, 3.14159, 0], dtype=torch.float64)
-    assert t.check_rw_by_const_ref(a);
-    assert a[1] == 1.414214;
-    assert t.check_rw_by_const_ref_float64(a);
-    assert a[2] == 2.718282;
-    assert a[4] == 16.0;
-    assert t.check_ro_by_const_ref_ro(a);
-    assert t.check_ro_by_const_ref_const_float64(a);
-    assert a[0] == 0.0;
-    assert a[3] == 3.14159;
+    assert t.check_rw_by_const_ref(a)
+    assert a[1] == 1.414214
+    assert t.check_rw_by_const_ref_float64(a)
+    assert a[2] == 2.718282
+    assert a[4] == 16.0
+    assert t.check_ro_by_const_ref_ro(a)
+    assert t.check_ro_by_const_ref_const_float64(a)
+    assert a[0] == 0.0
+    assert a[3] == 3.14159
 
     a = torch.tensor([0, 0, 0, 3.14159, 0], dtype=torch.float64)
-    assert t.check_rw_by_rvalue_ref(a);
-    assert a[1] == 1.414214;
-    assert t.check_rw_by_rvalue_ref_float64(a);
-    assert a[2] == 2.718282;
-    assert a[4] == 16.0;
-    assert t.check_ro_by_rvalue_ref_ro(a);
-    assert t.check_ro_by_rvalue_ref_const_float64(a);
-    assert a[0] == 0.0;
-    assert a[3] == 3.14159;
+    assert t.check_rw_by_rvalue_ref(a)
+    assert a[1] == 1.414214
+    assert t.check_rw_by_rvalue_ref_float64(a)
+    assert a[2] == 2.718282
+    assert a[4] == 16.0
+    assert t.check_ro_by_rvalue_ref_ro(a)
+    assert t.check_ro_by_rvalue_ref_const_float64(a)
+    assert a[0] == 0.0
+    assert a[3] == 3.14159
 
 
 @needs_cupy
@@ -879,8 +920,6 @@ def test45_implicit_conversion_cupy():
 @needs_numpy
 def test46_implicit_conversion_contiguous_complex():
     # Test fix for issue #709
-    import numpy as np
-
     c_f32 = np.random.rand(10, 10)
     c_c64 = c_f32.astype(np.complex64)
 
@@ -907,7 +946,6 @@ def test_conv(x):
 
 @needs_numpy
 def test_47_ret_infer():
-    import numpy as np
     assert np.all(t.ret_infer_c() == [[1, 2, 3, 4], [5, 6, 7, 8]])
     assert np.all(t.ret_infer_f() == [[1, 3, 5, 7], [2, 4, 6, 8]])
 
@@ -956,13 +994,12 @@ def test50_test_matrix4f_copy():
 
 @needs_numpy
 def test51_return_from_stack():
-    import numpy as np
     assert np.all(t.ret_from_stack_1() == [1,2,3])
     assert np.all(t.ret_from_stack_2() == [1,2,3])
 
+
 @needs_numpy
 def test52_accept_np_both_true_contig():
-    import numpy as np
     a = np.zeros((2, 1), dtype=np.float32)
     assert a.flags['C_CONTIGUOUS'] and a.flags['F_CONTIGUOUS']
     t.accept_np_both_true_contig_a(a)
@@ -972,6 +1009,28 @@ def test52_accept_np_both_true_contig():
 
 @needs_numpy
 def test53_issue_930():
-    import numpy as np
     wrapper = t.Wrapper(np.ones(3, dtype=np.float32))
     assert wrapper.value[0] == 1
+
+
+@needs_numpy
+def test54_docs_example():
+    ma = t.MyArray()
+    aa = ma.array_api()
+    assert 'versioned' not in repr(aa.__dlpack__())
+    assert 'versioned' not in repr(ma.__dlpack__())
+    assert 'versioned' in repr(aa.__dlpack__(max_version=(1, 2)))
+    assert 'versioned' in repr(ma.__dlpack__(max_version=(1, 2)))
+    assert aa.__dlpack_device__() == (1, 0)
+    assert ma.__dlpack_device__() == (1, 0)
+
+    if hasattr(np, 'from_dlpack'):
+        x = np.from_dlpack(aa)
+        y = np.from_dlpack(ma)
+        assert np.all(x == [0.0, 1.0, 2.0, 3.0, 4.0])
+        assert np.all(y == [0.0, 1.0, 2.0, 3.0, 4.0])
+        ma.mutate()
+        assert np.all(x == [0.5, 1.5, 2.5, 3.5, 4.5])
+        assert np.all(y == [0.5, 1.5, 2.5, 3.5, 4.5])
+    else:
+        pytest.skip('your version of numpy is too old')
diff --git a/extern/nanobind/tests/test_ndarray_ext.pyi.ref b/extern/nanobind/tests/test_ndarray_ext.pyi.ref
index 853bea5d7..6975da390 100644
--- a/extern/nanobind/tests/test_ndarray_ext.pyi.ref
+++ b/extern/nanobind/tests/test_ndarray_ext.pyi.ref
@@ -34,7 +34,7 @@ def pass_complex64_const(array: Annotated[NDArray[numpy.complex64], dict(writabl
 
 def pass_uint32(array: NDArray[numpy.uint32]) -> None: ...
 
-def pass_bool(array: NDArray[numpy.bool]) -> None: ...
+def pass_bool(array: NDArray[numpy.bool_]) -> None: ...
 
 def pass_float32_shaped(array: Annotated[NDArray[numpy.float32], dict(shape=(3, None, 4))]) -> None: ...
 
@@ -81,6 +81,9 @@ def check_device(arg: Annotated[NDArray, dict(device='cpu')], /) -> str: ...
 @overload
 def check_device(arg: Annotated[NDArray, dict(device='cuda')], /) -> str: ...
 
+@overload
+def initialize(arg: Annotated[NDArray[numpy.uint8], dict(shape=(10), device='cpu')], /) -> None: ...
+
 @overload
 def initialize(arg: Annotated[NDArray[numpy.float32], dict(shape=(10), device='cpu')], /) -> None: ...
 
@@ -97,7 +100,7 @@ def process(arg: Annotated[NDArray[numpy.uint8], dict(shape=(None, None, 3), ord
 
 def destruct_count() -> int: ...
 
-def return_dlpack() -> Annotated[NDArray[numpy.float32], dict(shape=(2, 4))]: ...
+def return_no_framework() -> Annotated[NDArray[numpy.float32], dict(shape=(2, 4))]: ...
 
 def passthrough(arg: NDArray, /) -> NDArray: ...
 
@@ -115,6 +118,10 @@ def ret_numpy_const() -> Annotated[NDArray[numpy.float32], dict(shape=(2, 4), wr
 
 def ret_pytorch() -> Annotated[NDArray[numpy.float32], dict(shape=(2, 4))]: ...
 
+def ret_memview() -> memoryview[dtype=float64, shape=(2, 4)]: ...
+
+def ret_array_api() -> ArrayLike[dtype=float64, shape=(2, 4)]: ...
+
 def ret_array_scalar() -> NDArray[numpy.float32]: ...
 
 def noop_3d_c_contig(arg: Annotated[NDArray[numpy.float32], dict(shape=(None, None, None), order='C')], /) -> None: ...
@@ -195,3 +202,14 @@ class Wrapper:
 
     @value.setter
     def value(self, arg: NDArray[numpy.float32], /) -> None: ...
+
+class MyArray:
+    def __init__(self) -> None: ...
+
+    def mutate(self) -> None: ...
+
+    def __dlpack__(self, **kwargs) -> object: ...
+
+    def __dlpack_device__(self) -> tuple[int, int]: ...
+
+    def array_api(self) -> ArrayLike[dtype=float64]: ...
diff --git a/extern/nanobind/tests/test_specialization.py b/extern/nanobind/tests/test_specialization.py
new file mode 100644
index 000000000..8a2541e41
--- /dev/null
+++ b/extern/nanobind/tests/test_specialization.py
@@ -0,0 +1,103 @@
+import sys
+import sysconfig
+import dis
+import pytest
+
+# Note: these tests verify that CPython's adaptive specializing interpreter can
+# optimize various expressions involving nanobind types. They are expected to
+# be somewhat fragile across Python versions as the bytecode and specialization
+# opcodes may change.
+
+# Skip tests on PyPy and free-threaded Python
+skip_tests = sys.implementation.name == "pypy" or \
+    sysconfig.get_config_var("Py_GIL_DISABLED")
+
+import test_classes_ext as t
+def disasm(func):
+    """Extract specialized opcode names from a function"""
+    instructions = list(dis.get_instructions(func, adaptive=True))
+    return [(instr.opname, instr.argval) for instr in instructions]
+
+def warmup(fn):
+    # Call the function a few times to ensure that it is specialized
+    for _ in range(8):
+        fn()
+
+def count_op(ops, expected):
+    hits = 0
+    for opname, _ in ops:
+        if opname == expected:
+            hits += 1
+    return hits
+
+@pytest.mark.skipif(
+    sys.version_info < (3, 14) or skip_tests,
+    reason="Static attribute specialization requires CPython 3.14+")
+def test_static_attribute_specialization():
+    s = t.Struct
+    def fn():
+        return s.static_test
+
+    ops = disasm(fn)
+    print(ops)
+    op_base = count_op(ops, "LOAD_ATTR")
+    op_opt = (
+        count_op(ops, "LOAD_ATTR_ADAPTIVE") +
+        count_op(ops, "LOAD_ATTR_CLASS"))
+    assert op_base == 1 and op_opt == 0
+
+    warmup(fn)
+    ops = disasm(fn)
+    print(ops)
+
+    op_base = count_op(ops, "LOAD_ATTR")
+    op_opt = (
+        count_op(ops, "LOAD_ATTR_ADAPTIVE") +
+        count_op(ops, "LOAD_ATTR_CLASS"))
+    assert op_base == 0 and op_opt == 1
+
+@pytest.mark.skipif(
+    sys.version_info < (3, 11) or skip_tests,
+    reason="Method call specialization requires CPython 3.14+")
+def test_method_call_specialization():
+    s = t.Struct()
+    def fn():
+        return s.value()
+
+    ops = disasm(fn)
+    op_base = (
+        count_op(ops, "LOAD_METHOD") +
+        count_op(ops, "LOAD_ATTR"))
+    op_opt = (
+        count_op(ops, "LOAD_ATTR_METHOD_NO_DICT") +
+        count_op(ops, "CALL_ADAPTIVE"))
+    print(ops)
+    assert op_base == 1 and op_opt == 0
+
+    warmup(fn)
+    ops = disasm(fn)
+    print(ops)
+    op_base = (
+        count_op(ops, "LOAD_METHOD") +
+        count_op(ops, "LOAD_ATTR"))
+    op_opt = (
+        count_op(ops, "LOAD_ATTR_METHOD_NO_DICT") +
+        count_op(ops, "CALL_ADAPTIVE"))
+    assert op_base == 0 and op_opt == 1
+
+
+@pytest.mark.skipif(sys.version_info < (3, 11) or skip_tests,
+    reason="Immutability requires Python 3.11+")
+def test_immutability():
+    # Test nb_method immutability
+    method = t.Struct.value
+    method_type = type(method)
+    assert method_type.__name__ == "nb_method"
+    with pytest.raises(TypeError, match="immutable"):
+        method_type.test_attr = 123
+
+    # Test metaclass immutability
+    metaclass = type(t.Struct)
+    assert metaclass.__name__.startswith("nb_type")
+    with pytest.raises(TypeError, match="immutable"):
+        metaclass.test_attr = 123
diff --git a/extern/nanobind/tests/test_stl.cpp b/extern/nanobind/tests/test_stl.cpp
index 9fb60980a..48edaed50 100644
--- a/extern/nanobind/tests/test_stl.cpp
+++ b/extern/nanobind/tests/test_stl.cpp
@@ -19,6 +19,8 @@ NB_MAKE_OPAQUE(std::vector<float, std::allocator<float>>)
 
 namespace nb = nanobind;
 
+static_assert(nb::detail::has_arg_defaults_v<std::optional<bool>>);
+
 static int default_constructed = 0, value_constructed = 0, copy_constructed = 0,
            move_constructed = 0, copy_assigned = 0, move_assigned = 0,
            destructed = 0;
@@ -69,9 +71,7 @@ struct FuncWrapper {
 };
 
 int funcwrapper_tp_traverse(PyObject *self, visitproc visit, void *arg) {
-    #if PY_VERSION_HEX >= 0x03090000
-        Py_VISIT(Py_TYPE(self));
-    #endif
+    Py_VISIT(Py_TYPE(self));
 
     if (!nb::inst_ready(self)) {
         return 0;
diff --git a/extern/nanobind/tests/test_stl.py b/extern/nanobind/tests/test_stl.py
index 5fe715fe8..9c227e64c 100644
--- a/extern/nanobind/tests/test_stl.py
+++ b/extern/nanobind/tests/test_stl.py
@@ -4,17 +4,6 @@
 import sys
 from common import collect, skip_on_pypy
 
-# Some helper declaration to check types across different Python versions
-if sys.version_info < (3, 9):
-    TYPING_DICT = "typing.Dict"
-    TYPING_SET = "typing.Set"
-    TYPING_ABC = "typing"
-    TYPING_ABSTRACT_SET = "typing.AbstractSet"
-else:
-    TYPING_DICT = "dict"
-    TYPING_SET = "set"
-    TYPING_ABC = "collections.abc"
-    TYPING_ABSTRACT_SET = "collections.abc.Set"
 
 
 def optional(arg: str, /) -> str:
@@ -396,6 +385,7 @@ def test38_std_optional_none():
     t.optional_none(None)
     assert t.optional_cstr(None) == "none"
     assert t.optional_cstr("hi") == "hi"
+    assert t.optional_non_assignable(None) == None
 
 
 def test39_std_optional_ret_opt_movable(clean):
@@ -517,7 +507,7 @@ def test50_map_return_movable_value():
         assert k == chr(ord("a") + i)
         assert v.value == i
         assert t.map_return_movable_value.__doc__ == (
-            f"map_return_movable_value() -> {TYPING_DICT}[str, test_stl_ext.Movable]"
+            "map_return_movable_value() -> dict[str, test_stl_ext.Movable]"
         )
 
 
@@ -526,14 +516,14 @@ def test51_map_return_copyable_value():
         assert k == chr(ord("a") + i)
         assert v.value == i
         assert t.map_return_copyable_value.__doc__ == (
-            f"map_return_copyable_value() -> {TYPING_DICT}[str, test_stl_ext.Copyable]"
+            "map_return_copyable_value() -> dict[str, test_stl_ext.Copyable]"
         )
 
 
 def test52_map_movable_in_value():
     t.map_movable_in_value(dict([(chr(ord("a") + i), t.Movable(i)) for i in range(10)]))
     assert t.map_movable_in_value.__doc__ == (
-        f"map_movable_in_value(x: {TYPING_ABC}.Mapping[str, test_stl_ext.Movable]) -> None"
+        "map_movable_in_value(x: collections.abc.Mapping[str, test_stl_ext.Movable]) -> None"
     )
 
 
@@ -542,7 +532,7 @@ def test53_map_copyable_in_value():
         dict([(chr(ord("a") + i), t.Copyable(i)) for i in range(10)])
     )
     assert t.map_copyable_in_value.__doc__ == (
-        f"map_copyable_in_value(x: {TYPING_ABC}.Mapping[str, test_stl_ext.Copyable]) -> None"
+        "map_copyable_in_value(x: collections.abc.Mapping[str, test_stl_ext.Copyable]) -> None"
     )
 
 
@@ -551,7 +541,7 @@ def test54_map_movable_in_lvalue_ref():
         dict([(chr(ord("a") + i), t.Movable(i)) for i in range(10)])
     )
     assert t.map_movable_in_lvalue_ref.__doc__ == (
-        f"map_movable_in_lvalue_ref(x: {TYPING_ABC}.Mapping[str, test_stl_ext.Movable]) -> None"
+        "map_movable_in_lvalue_ref(x: collections.abc.Mapping[str, test_stl_ext.Movable]) -> None"
     )
 
 
@@ -560,14 +550,14 @@ def test55_map_movable_in_rvalue_ref():
         dict([(chr(ord("a") + i), t.Movable(i)) for i in range(10)])
     )
     assert t.map_movable_in_rvalue_ref.__doc__ == (
-        f"map_movable_in_rvalue_ref(x: {TYPING_ABC}.Mapping[str, test_stl_ext.Movable]) -> None"
+        "map_movable_in_rvalue_ref(x: collections.abc.Mapping[str, test_stl_ext.Movable]) -> None"
     )
 
 
 def test56_map_movable_in_ptr():
     t.map_movable_in_ptr(dict([(chr(ord("a") + i), t.Movable(i)) for i in range(10)]))
     assert t.map_movable_in_ptr.__doc__ == (
-        f"map_movable_in_ptr(x: {TYPING_ABC}.Mapping[str, test_stl_ext.Movable]) -> None"
+        "map_movable_in_ptr(x: collections.abc.Mapping[str, test_stl_ext.Movable]) -> None"
     )
 
 
@@ -601,9 +591,9 @@ def test60_set_return_value():
         assert k == chr(ord("a") + i)
     for i, k in enumerate(sorted(t.unordered_set_return_value())):
         assert k == chr(ord("a") + i)
-    assert t.set_return_value.__doc__ == (f"set_return_value() -> {TYPING_SET}[str]")
+    assert t.set_return_value.__doc__ == "set_return_value() -> set[str]"
     assert t.unordered_set_return_value.__doc__ == (
-        f"unordered_set_return_value() -> {TYPING_SET}[str]"
+        "unordered_set_return_value() -> set[str]"
     )
 
 
@@ -611,24 +601,24 @@ def test61_set_in_value():
     t.set_in_value(set([chr(ord("a") + i) for i in range(10)]))
     t.unordered_set_in_value(set([chr(ord("a") + i) for i in range(10)]))
     assert t.set_in_value.__doc__ == (
-        f"set_in_value(x: {TYPING_ABSTRACT_SET}[str]) -> None"
+        "set_in_value(x: collections.abc.Set[str]) -> None"
     )
     assert t.unordered_set_in_value.__doc__ == (
-        f"unordered_set_in_value(x: {TYPING_ABSTRACT_SET}[str]) -> None"
+        "unordered_set_in_value(x: collections.abc.Set[str]) -> None"
     )
 
 
 def test62_set_in_lvalue_ref():
     t.set_in_lvalue_ref(set([chr(ord("a") + i) for i in range(10)]))
     assert t.set_in_lvalue_ref.__doc__ == (
-        f"set_in_lvalue_ref(x: {TYPING_ABSTRACT_SET}[str]) -> None"
+        "set_in_lvalue_ref(x: collections.abc.Set[str]) -> None"
     )
 
 
 def test63_set_in_rvalue_ref():
     t.set_in_rvalue_ref(set([chr(ord("a") + i) for i in range(10)]))
     assert t.set_in_rvalue_ref.__doc__ == (
-        f"set_in_rvalue_ref(x: {TYPING_ABSTRACT_SET}[str]) -> None"
+        "set_in_rvalue_ref(x: collections.abc.Set[str]) -> None"
     )
 
 
diff --git a/extern/nanobind/tests/test_stl_bind_map.py b/extern/nanobind/tests/test_stl_bind_map.py
index e8823d83a..dc4adf811 100644
--- a/extern/nanobind/tests/test_stl_bind_map.py
+++ b/extern/nanobind/tests/test_stl_bind_map.py
@@ -95,10 +95,7 @@ def test_map_string_double(capfd):
     assert type(values).__qualname__ == 'MapStringDouble.ValueView'
     assert type(items).__qualname__ == 'MapStringDouble.ItemView'
 
-    if sys.version_info < (3, 9):
-        d = "typing.Dict"
-    else:
-        d = "dict"
+    d = "dict"
 
     assert t.MapStringDouble.__init__.__doc__ == \
 """__init__(self) -> None
diff --git a/extern/nanobind/tests/test_stl_ext.pyi.ref b/extern/nanobind/tests/test_stl_ext.pyi.ref
index f39f3a15f..eb197bf1e 100644
--- a/extern/nanobind/tests/test_stl_ext.pyi.ref
+++ b/extern/nanobind/tests/test_stl_ext.pyi.ref
@@ -167,7 +167,7 @@ def optional_unbound_type(x: int | None = None) -> int | None: ...
 
 def optional_unbound_type_with_nullopt_as_default(x: int | None = None) -> int | None: ...
 
-def optional_non_assignable(arg: NonAssignable, /) -> NonAssignable | None: ...
+def optional_non_assignable(arg: NonAssignable | None) -> NonAssignable | None: ...
 
 def variant_copyable(arg: Copyable | int, /) -> None: ...
 
diff --git a/extern/nanobind/tests/test_stubs.py b/extern/nanobind/tests/test_stubs.py
index 57c0fa146..8c32bb4ad 100644
--- a/extern/nanobind/tests/test_stubs.py
+++ b/extern/nanobind/tests/test_stubs.py
@@ -49,8 +49,8 @@ def test01_check_stub_refs(p_ref, request):
 
     if "test_functions_ext" in p_in.name and sys.version_info < (3, 13):
         s_ref = [line.replace("types.CapsuleType", "typing_extensions.CapsuleType") for line in s_ref]
-        s_ref.insert(3, "")
-        s_ref.insert(4, "import typing_extensions")
+        s_ref.insert(5, "")
+        s_ref.insert(6, "import typing_extensions")
 
     s_in = remove_platform_dependent(s_in)
     s_ref = remove_platform_dependent(s_ref)
diff --git a/extern/nanobind/tests/test_tensorflow.cpp b/extern/nanobind/tests/test_tensorflow.cpp
index 37088e131..a6c74e52d 100644
--- a/extern/nanobind/tests/test_tensorflow.cpp
+++ b/extern/nanobind/tests/test_tensorflow.cpp
@@ -1,10 +1,10 @@
 #include <nanobind/nanobind.h>
 #include <nanobind/ndarray.h>
+
 namespace nb = nanobind;
 
 int destruct_count = 0;
 
-
 NB_MODULE(test_tensorflow_ext, m) {
     m.def("destruct_count", []() { return destruct_count; });
     m.def("ret_tensorflow", []() {
@@ -14,12 +14,14 @@ NB_MODULE(test_tensorflow_ext, m) {
         Buf *buf = new Buf({ 1, 2, 3, 4, 5, 6, 7, 8 });
         size_t shape[2] = { 2, 4 };
 
-        nb::capsule deleter(buf, [](void *data) noexcept {
+        nb::capsule deleter(buf, [](void *p) noexcept {
            destruct_count++;
-           delete (Buf *) data;
+           delete (Buf *) p;
         });
 
-        return nb::ndarray<nb::tensorflow, float, nb::shape<2, 4>>(buf->f, 2, shape,
+        return nb::ndarray<nb::tensorflow, float, nb::shape<2, 4>>(buf->f,
+                                                                   2,
+                                                                   shape,
                                                                    deleter);
     });
 }
diff --git a/extern/nanobind/tests/test_tensorflow.py b/extern/nanobind/tests/test_tensorflow.py
index e69de29bb..9092344b0 100644
--- a/extern/nanobind/tests/test_tensorflow.py
+++ b/extern/nanobind/tests/test_tensorflow.py
@@ -0,0 +1,97 @@
+import test_ndarray_ext as t
+import test_tensorflow_ext as ttf
+import pytest
+import warnings
+import importlib
+from common import collect
+
+try:
+    import tensorflow as tf
+    import tensorflow.config
+    def needs_tensorflow(x):
+        return x
+except:
+    needs_tensorflow = pytest.mark.skip(reason="TensorFlow is required")
+
+
+@needs_tensorflow
+def test01_constrain_order():
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        try:
+            c = tf.zeros((3, 5))
+        except:
+            pytest.skip('tensorflow is missing')
+
+    assert t.check_order(c) == 'C'
+
+
+@needs_tensorflow
+def test02_implicit_conversion():
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        try:
+            c = tf.zeros((3, 5))
+        except:
+            pytest.skip('tensorflow is missing')
+
+        t.implicit(tf.zeros((2, 2), dtype=tf.int32))
+        t.implicit(tf.zeros((2, 2, 10), dtype=tf.float32)[:, :, 4])
+        t.implicit(tf.zeros((2, 2, 10), dtype=tf.int32)[:, :, 4])
+        t.implicit(tf.zeros((2, 2, 10), dtype=tf.bool)[:, :, 4])
+
+        with pytest.raises(TypeError) as excinfo:
+            t.noimplicit(tf.zeros((2, 2), dtype=tf.int32))
+
+        with pytest.raises(TypeError) as excinfo:
+            t.noimplicit(tf.zeros((2, 2), dtype=tf.bool))
+
+
+@needs_tensorflow
+def test03_return_tensorflow():
+    collect()
+    dc = ttf.destruct_count()
+    x = ttf.ret_tensorflow()
+    assert x.get_shape().as_list() == [2, 4]
+    assert tf.math.reduce_all(
+               x == tf.constant([[1,2,3,4], [5,6,7,8]], dtype=tf.float32))
+    del x
+    collect()
+    assert ttf.destruct_count() - dc == 1
+
+
+@needs_tensorflow
+def test04_check():
+    assert t.check(tf.zeros((1)))
+
+
+@needs_tensorflow
+def test05_passthrough():
+    a = ttf.ret_tensorflow()
+    b = t.passthrough(a)
+    assert a is b
+
+    a = tf.constant([1, 2, 3])
+    b = t.passthrough(a)
+    assert a is b
+
+    a = None
+    with pytest.raises(TypeError) as excinfo:
+        b = t.passthrough(a)
+    assert 'incompatible function arguments' in str(excinfo.value)
+    b = t.passthrough_arg_none(a)
+    assert a is b
+
+
+@needs_tensorflow
+def test06_ro_array():
+    if tf.__version__ < '2.19':
+        pytest.skip('tensorflow version is too old')
+    a = tf.constant([1, 2], dtype=tf.float32)  # immutable
+    assert t.accept_ro(a) == 1
+    # If the next line fails, delete it, update the version above,
+    # and uncomment the three lines below.
+    assert t.accept_rw(a) == 1
+    # with pytest.raises(TypeError) as excinfo:
+    #     t.accept_rw(a)
+    # assert 'incompatible function arguments' in str(excinfo.value)
diff --git a/extern/nanobind/tests/test_thread.cpp b/extern/nanobind/tests/test_thread.cpp
index 97e82960e..34f6ab989 100644
--- a/extern/nanobind/tests/test_thread.cpp
+++ b/extern/nanobind/tests/test_thread.cpp
@@ -1,4 +1,8 @@
 #include <nanobind/nanobind.h>
+#include <nanobind/stl/shared_ptr.h>
+
+#include <memory>
+#include <vector>
 
 namespace nb = nanobind;
 using namespace nb::literals;
@@ -32,6 +36,11 @@ class ClassWithClassProperty {
     ClassWithProperty value_;
 };
 
+struct AnInt {
+    int value;
+    AnInt(int v) : value(v) {}
+};
+
 
 NB_MODULE(test_thread_ext, m) {
     nb::class_<Counter>(m, "Counter")
@@ -68,4 +77,17 @@ NB_MODULE(test_thread_ext, m) {
             new (self) ClassWithClassProperty(std::move(value));
           }, nb::arg("value"))
         .def_prop_ro("prop1", &ClassWithClassProperty::get_prop);
+
+    nb::class_<AnInt>(m, "AnInt")
+        .def(nb::init<int>())
+        .def_rw("value", &AnInt::value);
+
+    std::vector<std::shared_ptr<AnInt>> shared_ints;
+    for (int i = 0; i < 5; ++i) {
+        shared_ints.push_back(std::make_shared<AnInt>(i));
+    }
+    m.def("fetch_shared_int", [shared_ints](int i) {
+        return shared_ints.at(i);
+    });
+    m.def("consume_an_int", [](AnInt* p) { return p->value; });
 }
diff --git a/extern/nanobind/tests/test_thread.py b/extern/nanobind/tests/test_thread.py
index 1d5992ee1..22bb5c71d 100644
--- a/extern/nanobind/tests/test_thread.py
+++ b/extern/nanobind/tests/test_thread.py
@@ -1,3 +1,6 @@
+import random
+import threading
+
 import test_thread_ext as t
 from test_thread_ext import Counter, GlobalData, ClassWithProperty, ClassWithClassProperty
 from common import parallelize
@@ -100,3 +103,16 @@ def f():
             _ = c2.prop1.prop2
 
     parallelize(f, n_threads=n_threads)
+
+
+def test08_shared_ptr_threaded_access(n_threads=8):
+    # Test for keep_alive racing with other fields.
+    def f(barrier):
+        i = random.randint(0, 4)
+        barrier.wait()
+        p = t.fetch_shared_int(i)
+        assert t.consume_an_int(p) == i
+
+    for _ in range(100):
+        barrier = threading.Barrier(n_threads)
+        parallelize(lambda: f(barrier), n_threads=n_threads)
diff --git a/extern/nanobind/tests/test_typing.cpp b/extern/nanobind/tests/test_typing.cpp
index 01faa37a8..0a2e3a155 100644
--- a/extern/nanobind/tests/test_typing.cpp
+++ b/extern/nanobind/tests/test_typing.cpp
@@ -57,7 +57,9 @@ NB_MODULE(test_typing_ext, m) {
 
     m.def("makeNestedClass", [] { return NestedClass(); });
 
-    // Aliases to local functoins and types
+    m.attr("AnyTuple") = nb::typing().attr("Tuple")[nb::make_tuple(nb::any_type(), nb::ellipsis())];
+
+    // Aliases to local functions and types
     m.attr("FooAlias") = m.attr("Foo");
     m.attr("f_alias") = m.attr("f");
     nb::type<Foo>().attr("lt_alias") = nb::type<Foo>().attr("__lt__");
@@ -65,7 +67,7 @@ NB_MODULE(test_typing_ext, m) {
     // Custom signature generation for classes and methods
     struct CustomSignature { int value; };
     nb::class_<CustomSignature>(
-        m, "CustomSignature", nb::sig("@my_decorator\nclass CustomSignature(" NB_TYPING_ITERABLE "[int])"))
+        m, "CustomSignature", nb::sig("@my_decorator\nclass CustomSignature(collections.abc.Iterable[int])"))
         .def("method", []{}, nb::sig("@my_decorator\ndef method(self: typing.Self)"))
         .def("method_with_default", [](CustomSignature&,bool){}, "value"_a.sig("bool(True)") = true)
         .def_rw("value", &CustomSignature::value,
@@ -100,7 +102,7 @@ NB_MODULE(test_typing_ext, m) {
             nb::sig("def get(self, /) -> T"))
        .def(nb::self == nb::self, nb::sig("def __eq__(self, arg: object, /) -> bool"));
 
-#if PY_VERSION_HEX >= 0x03090000 && !defined(PYPY_VERSION) // https://github.com/pypy/pypy/issues/4914
+#if !defined(PYPY_VERSION) // https://github.com/pypy/pypy/issues/4914
     struct WrapperFoo : Wrapper { };
     nb::class_<WrapperFoo>(m, "WrapperFoo", wrapper[nb::type<Foo>()]);
 #endif
@@ -112,6 +114,10 @@ NB_MODULE(test_typing_ext, m) {
     m.def("list_front", [](nb::list l) { return l[0]; },
           nb::sig("def list_front[T](arg: list[T], /) -> T"));
 
+    // Type variables with constraints and a bound.
+    m.attr("T2") = nb::type_var("T2", "bound"_a = nb::type<Foo>());
+    m.attr("T3") = nb::type_var("T3", *nb::make_tuple(nb::type<Foo>(), nb::type<Wrapper>()));
+
     // Some statements that will be modified by the pattern file
     m.def("remove_me", []{});
     m.def("tweak_me", [](nb::object o) { return o; }, "prior docstring\nremains preserved");
diff --git a/extern/nanobind/tests/test_typing.py b/extern/nanobind/tests/test_typing.py
index 8a89db811..6a51021bd 100644
--- a/extern/nanobind/tests/test_typing.py
+++ b/extern/nanobind/tests/test_typing.py
@@ -3,7 +3,6 @@
 import pytest
 import platform
 
-@pytest.mark.skipif(sys.version_info < (3, 9), reason="requires python3.9 or higher")
 def test01_parameterize_generic():
     assert str(type(t.Wrapper[int]) == 't.Wrapper[int]')
     if platform.python_implementation() != 'PyPy':
diff --git a/extern/nanobind/tests/test_typing_ext.pyi.ref b/extern/nanobind/tests/test_typing_ext.pyi.ref
index f2df74ee9..e964e01e0 100644
--- a/extern/nanobind/tests/test_typing_ext.pyi.ref
+++ b/extern/nanobind/tests/test_typing_ext.pyi.ref
@@ -1,6 +1,6 @@
 from collections.abc import Iterable
 import py_stub_test
-from typing import Generic, Optional, Self, TypeAlias, TypeVar
+from typing import Any, Generic, Optional, Self, TypeAlias, TypeVar
 
 from . import submodule as submodule
 from .submodule import F as F, f as f2
@@ -9,6 +9,8 @@ from .submodule import F as F, f as f2
 # a prefix
 
 class Foo:
+    # a class prefix
+
     def __lt__(self, arg: int, /) -> bool: ...
 
     def __gt__(self, arg: int, /) -> bool: ...
@@ -19,10 +21,14 @@ class Foo:
 
     lt_alias = __lt__
 
+    # a class suffix
+
 def f() -> None: ...
 
 def makeNestedClass() -> py_stub_test.AClass.NestedClass: ...
 
+AnyTuple: TypeAlias = tuple[Any, ...]
+
 FooAlias: TypeAlias = Foo
 
 f_alias = f
@@ -42,7 +48,7 @@ class CustomSignature(Iterable[int]):
     def value(self, value: Optional[int], /) -> None:
         """docstring for setter"""
 
-pytree: dict = {'a' : ('b', [123])}
+pytree: dict = ...
 
 T = TypeVar("T", contravariant=True)
 
@@ -61,6 +67,10 @@ class WrapperTypeParam[T]:
 
 def list_front[T](arg: list[T], /) -> T: ...
 
+T2 = TypeVar("T2", bound=Foo)
+
+T3 = TypeVar("T3", Foo, Wrapper)
+
 def tweak_me(arg: int):
     """
     prior docstring

From 01be85b82ef9645023a99c568ba5be90ffb4b629 Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Tue, 10 Feb 2026 12:05:51 +1300
Subject: [PATCH 3/4] robin-map: upgraded to version 1.4.1.

---
 .../ext/robin_map/.github/workflows/ci.yml    | 28 ++++---------------
 extern/nanobind/ext/robin_map/CMakeLists.txt  |  4 +--
 .../include/tsl/robin_growth_policy.h         |  2 +-
 .../ext/robin_map/tests/CMakeLists.txt        |  2 +-
 4 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/extern/nanobind/ext/robin_map/.github/workflows/ci.yml b/extern/nanobind/ext/robin_map/.github/workflows/ci.yml
index 96b698a66..1f7c2657a 100644
--- a/extern/nanobind/ext/robin_map/.github/workflows/ci.yml
+++ b/extern/nanobind/ext/robin_map/.github/workflows/ci.yml
@@ -29,13 +29,13 @@ jobs:
           }
         - {
             name: macos-x64-gcc,
-            os:  macos-13,
+            os:  macos-latest,
             cxx: g++,
             cmake-build-type: Release
           }
         - {
             name: macos-x64-clang,
-            os:  macos-13,
+            os:  macos-latest,
             cxx: clang++,
             cmake-build-type: Release
           }
@@ -55,32 +55,16 @@ jobs:
             cmake-build-type: Debug
           }
         - {
-            name: windows-x64-vs-2019,
-            os: windows-2019,
-            cmake-build-type: Release,
-            cmake-generator: Visual Studio 16 2019,
-            cmake-platform: x64,
-            vcpkg-triplet: x64-windows-static-md
-          }
-        - {
-            name: windows-x86-vs-2019,
-            os: windows-2019,
-            cmake-build-type: Release,
-            cmake-generator: Visual Studio 16 2019,
-            cmake-platform: Win32,
-            vcpkg-triplet: x86-windows-static-md
-          }
-        - {
-            name: windows-x64-vs-2022,
-            os: windows-2022,
+            name: windows-x64-vs-2025,
+            os: windows-2025,
             cmake-build-type: Release,
             cmake-generator: Visual Studio 17 2022,
             cmake-platform: x64,
             vcpkg-triplet: x64-windows-static-md
           }
         - {
-            name: windows-x86-vs-2022,
-            os: windows-2022,
+            name: windows-x86-vs-2025,
+            os: windows-2025,
             cmake-build-type: Release,
             cmake-generator: Visual Studio 17 2022,
             cmake-platform: Win32,
diff --git a/extern/nanobind/ext/robin_map/CMakeLists.txt b/extern/nanobind/ext/robin_map/CMakeLists.txt
index be1a3ff1d..f7659fd41 100644
--- a/extern/nanobind/ext/robin_map/CMakeLists.txt
+++ b/extern/nanobind/ext/robin_map/CMakeLists.txt
@@ -1,6 +1,6 @@
-cmake_minimum_required(VERSION 3.5)
+cmake_minimum_required(VERSION 3.10)
 
-project(tsl-robin-map VERSION 1.4.0 LANGUAGES CXX)
+project(tsl-robin-map VERSION 1.4.1 LANGUAGES CXX)
 
 include(GNUInstallDirs)
 
diff --git a/extern/nanobind/ext/robin_map/include/tsl/robin_growth_policy.h b/extern/nanobind/ext/robin_map/include/tsl/robin_growth_policy.h
index 787e19eb4..9abba3be2 100644
--- a/extern/nanobind/ext/robin_map/include/tsl/robin_growth_policy.h
+++ b/extern/nanobind/ext/robin_map/include/tsl/robin_growth_policy.h
@@ -43,7 +43,7 @@
 #define TSL_RH_VERSION_MINOR 4
 // A change of the patch version indicates a bugfix without additional
 // functionality
-#define TSL_RH_VERSION_PATCH 0
+#define TSL_RH_VERSION_PATCH 1
 
 #ifdef TSL_DEBUG
 #define tsl_rh_assert(expr) assert(expr)
diff --git a/extern/nanobind/ext/robin_map/tests/CMakeLists.txt b/extern/nanobind/ext/robin_map/tests/CMakeLists.txt
index 6d3bbcd09..c573f2e82 100644
--- a/extern/nanobind/ext/robin_map/tests/CMakeLists.txt
+++ b/extern/nanobind/ext/robin_map/tests/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.8)
+cmake_minimum_required(VERSION 3.10)
 
 project(tsl_robin_map_tests)
 

From 6ad6416a071954b3b52b5c638159f6a59ab9f73a Mon Sep 17 00:00:00 2001
From: Alan Garny <agarny@hellix.com>
Date: Tue, 10 Feb 2026 12:10:52 +1300
Subject: [PATCH 4/4] Formatting.

---
 tests/bindings/javascript/res/index.html | 24 ++++++------------------
 tests/bindings/python/test_file_basic.py |  1 -
 tests/bindings/python/test_file_type.py  |  1 -
 tests/bindings/python/test_version.py    |  1 -
 tests/bindings/python/utils.in.py        |  1 -
 5 files changed, 6 insertions(+), 22 deletions(-)

diff --git a/tests/bindings/javascript/res/index.html b/tests/bindings/javascript/res/index.html
index a4b0c673e..f00a80b24 100644
--- a/tests/bindings/javascript/res/index.html
+++ b/tests/bindings/javascript/res/index.html
@@ -27,15 +27,9 @@
         </button>
         <div class="collapse navbar-collapse" id="navbarNav">
           <ul class="navbar-nav">
-            <li class="nav-item">
-              <a class="nav-link" href="#" id="navHome">Home</a>
-            </li>
-            <li class="nav-item">
-              <a class="nav-link" href="#" id="navSimulation">Simulation</a>
-            </li>
-            <li class="nav-item">
-              <a class="nav-link" href="#" id="navVersions">Versions</a>
-            </li>
+            <li class="nav-item"><a class="nav-link" href="#" id="navHome">Home</a></li>
+            <li class="nav-item"><a class="nav-link" href="#" id="navSimulation">Simulation</a></li>
+            <li class="nav-item"><a class="nav-link" href="#" id="navVersions">Versions</a></li>
           </ul>
         </div>
       </div>
@@ -43,9 +37,7 @@
 
     <div class="p-2">
       <div class="page" id="pageHome">
-        <p>
-          Welcome to this test page for <a href="https://opencor.ws/libopencor">libOpenCOR</a>!
-        </p>
+        <p>Welcome to this test page for <a href="https://opencor.ws/libopencor">libOpenCOR</a>!</p>
       </div>
 
       <div class="page" id="pageSimulation">
@@ -67,14 +59,10 @@
           </div>
         </div>
 
-        <button type="button" class="btn btn-secondary reset-button" id="reset">
-          <i class="fa fa-recycle"></i>
-        </button>
+        <button type="button" class="btn btn-secondary reset-button" id="reset"><i class="fa fa-recycle"></i></button>
 
         <div class="file info" id="fileInfo">
-          <p class="info">
-            <span class="bold" id="fileName"></span>is <span class="bold" id="fileType"></span>.
-          </p>
+          <p class="info"><span class="bold" id="fileName"></span>is <span class="bold" id="fileType"></span>.</p>
         </div>
 
         <div class="file error last" id="fileIssues">
diff --git a/tests/bindings/python/test_file_basic.py b/tests/bindings/python/test_file_basic.py
index 202f3fbe8..4569ff8ae 100644
--- a/tests/bindings/python/test_file_basic.py
+++ b/tests/bindings/python/test_file_basic.py
@@ -19,7 +19,6 @@
 import utils
 from utils import assert_issues
 
-
 expected_no_issues = []
 expected_non_existing_file_issues = [
     [loc.Issue.Type.Error, "The file does not exist."],
diff --git a/tests/bindings/python/test_file_type.py b/tests/bindings/python/test_file_type.py
index 87b147246..eb75d01fe 100644
--- a/tests/bindings/python/test_file_type.py
+++ b/tests/bindings/python/test_file_type.py
@@ -17,7 +17,6 @@
 import utils
 from utils import assert_issues
 
-
 expected_unknown_file_issues = [
     [
         loc.Issue.Type.Error,
diff --git a/tests/bindings/python/test_version.py b/tests/bindings/python/test_version.py
index 020d731fd..017a1a5cb 100644
--- a/tests/bindings/python/test_version.py
+++ b/tests/bindings/python/test_version.py
@@ -16,7 +16,6 @@
 import libopencor as loc
 import pathlib
 
-
 with open(pathlib.Path(__file__).parent.parent.parent / "VERSION.txt") as file:
     version_str = file.read().strip()
 
diff --git a/tests/bindings/python/utils.in.py b/tests/bindings/python/utils.in.py
index 260531be7..87243b6ee 100644
--- a/tests/bindings/python/utils.in.py
+++ b/tests/bindings/python/utils.in.py
@@ -21,7 +21,6 @@
 import platform
 import pytest
 
-
 ResourceLocation = "@RESOURCE_LOCATION@"