From f1b9d44304f7aa7d3549321a7526b28acd8cf94d Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Mon, 2 Feb 2026 11:32:57 -0600
Subject: [PATCH 1/6] Support --no-batched CLI option

The option sets m_skip_batched boolean member in benchmark_base class.
Methods `bool get_skip_batched()` and `void set_skip_batched(bool)` added.

m_skip_batched is also added to state class. Similarly named methods
are added.

CLI help file documents `--no-batched` option.
---
 docs/cli_help.md              |  6 ++++++
 nvbench/benchmark_base.cuh    | 13 +++++++++++++
 nvbench/detail/state_exec.cuh |  2 +-
 nvbench/option_parser.cu      | 17 +++++++++++++++++
 nvbench/option_parser.cuh     |  1 +
 nvbench/state.cuh             |  9 +++++++++
 nvbench/state.cxx             |  1 +
 7 files changed, 48 insertions(+), 1 deletion(-)
diff --git a/docs/cli_help.md b/docs/cli_help.md
index 35265a1d..8313adb6 100644
--- a/docs/cli_help.md
+++ b/docs/cli_help.md
@@ -115,6 +115,12 @@
   * Applies to the most recent `--benchmark`, or all benchmarks if specified
     before any `--benchmark` arguments.
 
+* `--no-batched`
+  * Do not run batched measurements even if enabled.
+  * Intended to shorten run-time when batched measurements are not of interest.
+  * Applied to the most recent `--benchmark`, or all benchrmarks if specified
+    before any `--benchmark` arguments.
+
 ## Stopping Criteria
 
 * `--timeout <seconds>`
diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index 3eddf2ba..0ed901e1 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -183,6 +183,18 @@ struct benchmark_base
   }
   /// @}
 
+  /// If true, the batched measurements for benchmark are not run. This is intended for use to
+  /// save resources when only non-batched measurements are of interest, although batched
+  /// measurements are meaningful and code to exercise them is compiled. This option has not
+  /// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
+  [[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
+  benchmark_base &set_skip_batched(bool v)
+  {
+    m_skip_batched = v;
+    return *this;
+  }
+  /// @}
+
   /// If true, the benchmark does not use the blocking_kernel. This is intended
   /// for use with external profiling tools. @{
   [[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
@@ -304,6 +316,7 @@ protected:
   bool m_is_cpu_only{false};
   bool m_run_once{false};
   bool m_disable_blocking_kernel{false};
+  bool m_skip_batched{false};
 
   nvbench::int64_t m_min_samples{10};
 
diff --git a/nvbench/detail/state_exec.cuh b/nvbench/detail/state_exec.cuh
index bf4fb9e2..9a65fc25 100644
--- a/nvbench/detail/state_exec.cuh
+++ b/nvbench/detail/state_exec.cuh
@@ -185,7 +185,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
       static_assert(!(tags & no_batch), "Hot measurement doesn't support the `no_batch` exec_tag.");
       static_assert(!(tags & no_gpu), "Hot measurement doesn't support the `no_gpu` exec_tag.");
 
-      if (!this->get_run_once())
+      if (!this->get_run_once() && !this->get_skip_batched())
       {
         using measure_t = nvbench::detail::measure_hot<KL>;
         measure_t measure{*this, kernel_launcher};
diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu
index efddbb44..0957ce5c 100644
--- a/nvbench/option_parser.cu
+++ b/nvbench/option_parser.cu
@@ -467,6 +467,11 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
       this->enable_profile();
       first += 1;
     }
+    else if (arg == "--no-batched")
+    {
+      this->disable_batched();
+      first += 1;
+    }
     else if (arg == "--quiet" || arg == "-q")
     {
       // Setting this flag prevents the default stdout printer from being
@@ -762,6 +767,18 @@ void option_parser::enable_profile()
   bench.set_run_once(true);
 }
 
+void option_parser::disable_batched()
+{
+  // If no active benchmark, save args as global
+  if (m_benchmarks.empty())
+  {
+    m_global_benchmark_args.push_back("--no-batched");
+    return;
+  }
+  benchmark_base &bench = *m_benchmarks.back();
+  bench.set_skip_batched(true);
+}
+
 void option_parser::add_benchmark(const std::string &name)
 try
 {
diff --git a/nvbench/option_parser.cuh b/nvbench/option_parser.cuh
index 110a844a..7572ad2e 100644
--- a/nvbench/option_parser.cuh
+++ b/nvbench/option_parser.cuh
@@ -90,6 +90,7 @@ private:
   void set_stopping_criterion(const std::string &criterion);
 
   void enable_profile();
+  void disable_batched();
 
   void add_benchmark(const std::string &name);
   void replay_global_args();
diff --git a/nvbench/state.cuh b/nvbench/state.cuh
index 61fd8406..326dc411 100644
--- a/nvbench/state.cuh
+++ b/nvbench/state.cuh
@@ -161,6 +161,14 @@ struct state
   void set_run_once(bool v) { m_run_once = v; }
   /// @}
 
+  /// If true, the batched measurements of benchmark are not run. This is intended for use to
+  /// save resources when only non-batched measurements are of interest, although batched
+  /// measurements are meaningful and code to exercise them is compiled. This option has not
+  /// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
+  [[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
+  void set_skip_batched(bool v) { m_skip_batched = v; }
+  /// @}
+
   /// If true, the benchmark does not use the blocking_kernel. This is intended
   /// for use with external profiling tools. @{
   [[nodiscard]] bool get_disable_blocking_kernel() const { return m_disable_blocking_kernel; }
@@ -306,6 +314,7 @@ private:
   bool m_is_cpu_only{false};
   bool m_run_once{false};
   bool m_disable_blocking_kernel{false};
+  bool m_skip_batched{false};
 
   nvbench::criterion_params m_criterion_params;
   std::string m_stopping_criterion;
diff --git a/nvbench/state.cxx b/nvbench/state.cxx
index 9d526673..2ae3c111 100644
--- a/nvbench/state.cxx
+++ b/nvbench/state.cxx
@@ -57,6 +57,7 @@ state::state(const benchmark_base &bench,
     , m_is_cpu_only(bench.get_is_cpu_only())
     , m_run_once{bench.get_run_once()}
     , m_disable_blocking_kernel{bench.get_disable_blocking_kernel()}
+    , m_skip_batched{bench.get_skip_batched()}
     , m_criterion_params{bench.get_criterion_params()}
     , m_stopping_criterion(bench.get_stopping_criterion())
     , m_min_samples{bench.get_min_samples()}

From cff6df9bb23c903bf3272f256af49ed5afebc276 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Mon, 2 Feb 2026 12:28:39 -0600
Subject: [PATCH 2/6] Renamed option to --no-batch to stay aligned with tag
 name

---
 docs/cli_help.md         | 2 +-
 nvbench/option_parser.cu | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/cli_help.md b/docs/cli_help.md
index 8313adb6..43656a64 100644
--- a/docs/cli_help.md
+++ b/docs/cli_help.md
@@ -115,7 +115,7 @@
   * Applies to the most recent `--benchmark`, or all benchmarks if specified
     before any `--benchmark` arguments.
 
-* `--no-batched`
+* `--no-batch`
   * Do not run batched measurements even if enabled.
   * Intended to shorten run-time when batched measurements are not of interest.
   * Applied to the most recent `--benchmark`, or all benchrmarks if specified
diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu
index 0957ce5c..530ae94a 100644
--- a/nvbench/option_parser.cu
+++ b/nvbench/option_parser.cu
@@ -467,7 +467,7 @@ void option_parser::parse_range(option_parser::arg_iterator_t first,
       this->enable_profile();
       first += 1;
     }
-    else if (arg == "--no-batched")
+    else if (arg == "--no-batch")
     {
       this->disable_batched();
       first += 1;
@@ -772,7 +772,7 @@ void option_parser::disable_batched()
   // If no active benchmark, save args as global
   if (m_benchmarks.empty())
   {
-    m_global_benchmark_args.push_back("--no-batched");
+    m_global_benchmark_args.push_back("--no-batch");
     return;
   }
   benchmark_base &bench = *m_benchmarks.back();

From 27d64923558c652684711372ad7edd12a32b8793 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Mon, 2 Feb 2026 12:43:39 -0600
Subject: [PATCH 3/6] Factor out check for whether to skip hot measurement to a
 nvbench::state private method

---
 nvbench/detail/state_exec.cuh | 2 +-
 nvbench/state.cuh             | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/nvbench/detail/state_exec.cuh b/nvbench/detail/state_exec.cuh
index 9a65fc25..0a012423 100644
--- a/nvbench/detail/state_exec.cuh
+++ b/nvbench/detail/state_exec.cuh
@@ -185,7 +185,7 @@ void state::exec(ExecTags tags, KernelLauncher &&kernel_launcher)
       static_assert(!(tags & no_batch), "Hot measurement doesn't support the `no_batch` exec_tag.");
       static_assert(!(tags & no_gpu), "Hot measurement doesn't support the `no_gpu` exec_tag.");
 
-      if (!this->get_run_once() && !this->get_skip_batched())
+      if (!this->skip_hot_measurement())
       {
         using measure_t = nvbench::detail::measure_hot<KL>;
         measure_t measure{*this, kernel_launcher};
diff --git a/nvbench/state.cuh b/nvbench/state.cuh
index 326dc411..084324f1 100644
--- a/nvbench/state.cuh
+++ b/nvbench/state.cuh
@@ -306,6 +306,8 @@ private:
         std::optional<nvbench::device_info> device,
         std::size_t type_config_index);
 
+  bool skip_hot_measurement() { return get_run_once() || get_skip_batched(); }
+
   std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
   nvbench::named_values m_axis_values;
   std::optional<nvbench::device_info> m_device;

From cfb4a9b8b0b4601f9205906d79e8408107442a2c Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Mon, 2 Feb 2026 12:58:15 -0600
Subject: [PATCH 4/6] Fix for comment grammar

---
 nvbench/benchmark_base.cuh | 2 +-
 nvbench/state.cuh          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
index 0ed901e1..be631749 100644
--- a/nvbench/benchmark_base.cuh
+++ b/nvbench/benchmark_base.cuh
@@ -185,7 +185,7 @@ struct benchmark_base
 
   /// If true, the batched measurements for benchmark are not run. This is intended for use to
   /// save resources when only non-batched measurements are of interest, although batched
-  /// measurements are meaningful and code to exercise them is compiled. This option has not
+  /// measurements are meaningful and code to exercise them is compiled. This option has no
   /// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
   [[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
   benchmark_base &set_skip_batched(bool v)
diff --git a/nvbench/state.cuh b/nvbench/state.cuh
index 084324f1..d815df75 100644
--- a/nvbench/state.cuh
+++ b/nvbench/state.cuh
@@ -163,7 +163,7 @@ struct state
 
   /// If true, the batched measurements of benchmark are not run. This is intended for use to
   /// save resources when only non-batched measurements are of interest, although batched
-  /// measurements are meaningful and code to exercise them is compiled. This option has not
+  /// measurements are meaningful and code to exercise them is compiled. This option has no
   /// effect for CPU only benchmarks and for benchmarks tagged with no_batch tag. @{
   [[nodiscard]] bool get_skip_batched() const { return m_skip_batched; }
   void set_skip_batched(bool v) { m_skip_batched = v; }

From f049f109776a89639ae05c0bb263a7e168ea3899 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Mon, 2 Feb 2026 14:41:42 -0600
Subject: [PATCH 5/6] Fix typo

---
 docs/cli_help.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/cli_help.md b/docs/cli_help.md
index 43656a64..15a89a73 100644
--- a/docs/cli_help.md
+++ b/docs/cli_help.md
@@ -118,7 +118,7 @@
 * `--no-batch`
   * Do not run batched measurements even if enabled.
   * Intended to shorten run-time when batched measurements are not of interest.
-  * Applied to the most recent `--benchmark`, or all benchrmarks if specified
+  * Applied to the most recent `--benchmark`, or all benchmarks if specified
     before any `--benchmark` arguments.
 
 ## Stopping Criteria

From a33a454a2d4fc8ab08e8e36d2bfbc27da19ee7c4 Mon Sep 17 00:00:00 2001
From: Oleksandr Pavlyk <21087696+oleksandr-pavlyk@users.noreply.github.com>
Date: Mon, 2 Feb 2026 14:42:07 -0600
Subject: [PATCH 6/6] Make skip_hot_measurement method const

---
 nvbench/state.cuh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/nvbench/state.cuh b/nvbench/state.cuh
index d815df75..f71cbeb2 100644
--- a/nvbench/state.cuh
+++ b/nvbench/state.cuh
@@ -306,7 +306,7 @@ private:
         std::optional<nvbench::device_info> device,
         std::size_t type_config_index);
 
-  bool skip_hot_measurement() { return get_run_once() || get_skip_batched(); }
+  [[nodiscard]] bool skip_hot_measurement() const { return get_run_once() || get_skip_batched(); }
 
   std::reference_wrapper<const nvbench::benchmark_base> m_benchmark;
   nvbench::named_values m_axis_values;