From 6225a660f7f228d705407d9b6b8ade8278555c8c Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Mon, 11 Aug 2025 14:32:09 +0200
Subject: [PATCH 01/16] add tests on missing comparator file

---
 tests/test_cli.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5fed177..f7907ff 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -28,3 +28,21 @@ def test_cli():
         
     result = runner.invoke(adastop, ['compare', "--compare-to-first", 'examples/walker1.csv'])
     assert result.exit_code == 0
+
+
+
+def test_plot_no_comparator_save_file():
+    runner = CliRunner()
+    runner.invoke(adastop, ['reset', 'examples'])
+
+    result = runner.invoke(adastop, ['plot', 'examples', "test.pdf"])
+    assert result.exit_code == 1
+    assert result.exception.args[0] == 'Comparator save file not found.'
+
+def test_status_no_comparator_save_file():
+    runner = CliRunner()
+    runner.invoke(adastop, ['reset', 'examples'])
+
+    result = runner.invoke(adastop, ['status', 'examples'])
+    assert result.exit_code == 1
+    assert result.exception.args[0] == 'Comparator save file not found.'

From eebf4e9bb00d9c0f23955436338149de017305f3 Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Mon, 11 Aug 2025 14:36:16 +0200
Subject: [PATCH 02/16] update .gitignore

---
 .gitignore | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.gitignore b/.gitignore
index b6e4761..93e82b4 100644
--- a/.gitignore
+++ b/.gitignore
@@ -127,3 +127,10 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+#vsCode config
+.vscode/
+
+#tests artefact
+test.pdf
+examples/.adastop_comparator.pkl

From 02eb3d1512cef69816fbd279c605802383f574dc Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Mon, 11 Aug 2025 15:16:38 +0200
Subject: [PATCH 03/16] add tests on missing comparator file

---
 tests/test_cli.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index f7907ff..5ccc874 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,6 +1,7 @@
 import pytest
 from click.testing import CliRunner
 from adastop.cli import adastop
+import os
 
 # we reuse a bit of pytest's own testing machinery, this should eventually come
 import subprocess
@@ -8,6 +9,11 @@
 
 def test_cli():
     runner = CliRunner()
+    test_pdf_path = "test.pdf"
+
+    if os.path.exists(test_pdf_path):
+        os.remove(test_pdf_path)
+
     result = runner.invoke(adastop, ['reset', 'examples'])
     assert result.exit_code == 0
     for j in range(1,6):
@@ -18,10 +24,12 @@ def test_cli():
     result = runner.invoke(adastop, ['compare', 'examples/walker3.csv'])
     assert result.exit_code == 1
 
-    result = runner.invoke(adastop, ['plot', 'examples', "test.pdf"])
+    result = runner.invoke(adastop, ['plot', 'examples', test_pdf_path])
     assert result.exit_code == 0
     result = runner.invoke(adastop, ['status', 'examples'])
     assert result.exit_code == 0
+    assert os.path.exists(test_pdf_path) == True
+
 
     result = runner.invoke(adastop, ['reset', 'examples'])
     assert result.exit_code == 0

From 3d68422176f3bf5d09b3b9450a832740190c8aa1 Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Mon, 11 Aug 2025 15:51:50 +0200
Subject: [PATCH 04/16] correct definition of quantiles

---
 adastop/compare_agents.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/adastop/compare_agents.py b/adastop/compare_agents.py
index a32d743..80be099 100644
--- a/adastop/compare_agents.py
+++ b/adastop/compare_agents.py
@@ -212,7 +212,7 @@ def partial_compare(self, eval_values, verbose=True):
         if self.agent_names is None:
             self.agent_names = list(eval_values.keys())
 
-        Z = [eval_values[agent] for agent in self.agent_names]
+        Z = np.array([np.array(eval_values[agent]) for agent in self.agent_names])
         n_managers = len(Z)
         if isinstance(self.n,int):
             self.n = np.array([self.n]*n_managers)
@@ -256,13 +256,13 @@ def partial_compare(self, eval_values, verbose=True):
 
             # Compute admissible values, i.e. values that would not be rejected nor accepted.
             admissible_values_sup = values[
-                self.level_spent + icumulative_probas <= clevel
+                self.level_spent + icumulative_probas < clevel
             ]
 
             if len(admissible_values_sup) > 0:
                 bk_sup = admissible_values_sup[0]  # the minimum admissible value
                 level_to_add = icumulative_probas[
-                    self.level_spent + icumulative_probas <= clevel
+                    self.level_spent + icumulative_probas < clevel
                 ][0]
             else:
                 # This case is possible if clevel-self.level_spent <= 1/ self.normalization (smallest proba possible),
@@ -272,7 +272,7 @@ def partial_compare(self, eval_values, verbose=True):
 
             cumulative_probas = np.arange(len(values)) / self.normalization  # corresponds to P(T < t)
             admissible_values_inf = values[
-                self.power_spent + cumulative_probas < dlevel
+                self.power_spent + cumulative_probas <= dlevel
             ]
 
             if len(admissible_values_inf) > 0:

From a096dd6354602551b94bdd988ae95c631ef1fd23 Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Mon, 11 Aug 2025 15:52:01 +0200
Subject: [PATCH 05/16] add coverage tests

---
 tests/test_error_toy.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/test_error_toy.py b/tests/test_error_toy.py
index 6df8bff..31981fd 100644
--- a/tests/test_error_toy.py
+++ b/tests/test_error_toy.py
@@ -6,11 +6,18 @@
 alpha = 0.05
 n_runs = 10
 
-def test_runtime():
+def test_partial_compare():
     idxs = []
     comparator = MultipleAgentsComparator(n=3, K=3, B=B,  alpha=alpha, seed=42, beta = 0.01)
     evals = {"Agent "+str(k):np.random.normal(size=3) for k in range(3)}
     comparator.partial_compare(evals)
+
+
+def test_partial_compare_not_enough_points():
+    comparator = MultipleAgentsComparator(n=3, K=3, B=5000,  alpha=-1e-5, seed=42, beta = 0.01)
+    evals = {"Agent 1":np.array([0,0,0]),"Agent 2":np.array([0,0,0]),"Agent 3":np.array([0,0,0])}
+    comparator.partial_compare(evals)
+
     
 
 @pytest.mark.parametrize("K,n", [(10,2),(5,3), (3, 5), (1, 15)])

From dc43dc8a49893378693db47dd5c7e4249cd9f9f3 Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Mon, 11 Aug 2025 15:53:54 +0200
Subject: [PATCH 06/16] rename test file

---
 tests/{test_error_toy.py => test_compare_agents.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/{test_error_toy.py => test_compare_agents.py} (100%)

diff --git a/tests/test_error_toy.py b/tests/test_compare_agents.py
similarity index 100%
rename from tests/test_error_toy.py
rename to tests/test_compare_agents.py

From a6228a6f93546a29f8a2eff0ef1888aff87b45f1 Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Mon, 11 Aug 2025 16:42:08 +0200
Subject: [PATCH 07/16] update tests to improve coverage

---
 tests/test_plot.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/test_plot.py b/tests/test_plot.py
index 91e9fb9..279d4d5 100644
--- a/tests/test_plot.py
+++ b/tests/test_plot.py
@@ -35,9 +35,9 @@ def test_plot_sota():
     while not comparator.is_finished:
         if len(evals) >0:
             for k in range(n_agents):
-                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=n)])
+                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.abs(2*K-k)+np.random.normal(size=n)])
         else:
-            evals = {"Agent "+str(k): np.random.normal(size=n) for k in range(n_agents)}
+            evals = {"Agent "+str(k): np.abs(2*K-k)+np.random.normal(size=n) for k in range(n_agents)}
         comparator.partial_compare(evals)
     comparator.plot_results_sota()
     # plt.savefig('fig2.pdf')
@@ -51,9 +51,9 @@ def test_plot_noteq():
     while not comparator.is_finished:
         if len(evals) >0:
             for k in range(n_agents):
-                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , k+np.random.normal(size=10)])
+                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , np.abs(2*K-k)+np.random.normal(size=10)])
         else:
-            evals = {"Agent "+str(k): np.random.normal(size=10)+k for k in range(n_agents)}
+            evals = {"Agent "+str(k): np.random.normal(size=10)+np.abs(2*K-k) for k in range(n_agents)}
         comparator.partial_compare(evals)
     # plt.savefig('fig2.pdf')
     fig, axes= plt.subplots(1,2)

From 48fef83e829f6948e6d71bd737252c0bcd551d88 Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Tue, 12 Aug 2025 10:37:21 +0200
Subject: [PATCH 08/16] bug correction : was not managing different size

---
 adastop/compare_agents.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/adastop/compare_agents.py b/adastop/compare_agents.py
index 80be099..40a97ea 100644
--- a/adastop/compare_agents.py
+++ b/adastop/compare_agents.py
@@ -212,7 +212,7 @@ def partial_compare(self, eval_values, verbose=True):
         if self.agent_names is None:
             self.agent_names = list(eval_values.keys())
 
-        Z = np.array([np.array(eval_values[agent]) for agent in self.agent_names])
+        Z = [np.array(eval_values[agent]) for agent in self.agent_names]
         n_managers = len(Z)
         if isinstance(self.n,int):
             self.n = np.array([self.n]*n_managers)

From 70e33bdad5422d8214f109ae2224624ea1b0d7a1 Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Tue, 12 Aug 2025 10:59:47 +0200
Subject: [PATCH 09/16] improve plotting tests coverage

---
 tests/test_plot.py | 44 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/tests/test_plot.py b/tests/test_plot.py
index 279d4d5..e36303d 100644
--- a/tests/test_plot.py
+++ b/tests/test_plot.py
@@ -35,9 +35,9 @@ def test_plot_sota():
     while not comparator.is_finished:
         if len(evals) >0:
             for k in range(n_agents):
-                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.abs(2*K-k)+np.random.normal(size=n)])
+                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=n)])
         else:
-            evals = {"Agent "+str(k): np.abs(2*K-k)+np.random.normal(size=n) for k in range(n_agents)}
+            evals = {"Agent "+str(k): np.random.normal(size=n) for k in range(n_agents)}
         comparator.partial_compare(evals)
     comparator.plot_results_sota()
     # plt.savefig('fig2.pdf')
@@ -51,9 +51,9 @@ def test_plot_noteq():
     while not comparator.is_finished:
         if len(evals) >0:
             for k in range(n_agents):
-                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , np.abs(2*K-k)+np.random.normal(size=10)])
+                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , k+np.random.normal(size=10)])
         else:
-            evals = {"Agent "+str(k): np.random.normal(size=10)+np.abs(2*K-k) for k in range(n_agents)}
+            evals = {"Agent "+str(k): np.random.normal(size=10)+k for k in range(n_agents)}
         comparator.partial_compare(evals)
     # plt.savefig('fig2.pdf')
     fig, axes= plt.subplots(1,2)
@@ -76,3 +76,39 @@ def test_plot_sota_noteq():
     # plt.savefig('fig2.pdf')
     fig, axes= plt.subplots(1,2)
     comparator.plot_results_sota(axes=axes)
+
+
+
+def test_plot_noteq2():
+    n_agents = 3
+    comparator = MultipleAgentsComparator(n=10, K=K, B=B,  alpha=alpha, seed=42, beta = 0.01)
+    evals = {}
+    while not comparator.is_finished:
+        if len(evals) >0:
+            for k in range(n_agents):
+                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , np.abs(2*K-k)+np.random.normal(size=10)])
+        else:
+            evals = {"Agent "+str(k): np.random.normal(size=10)+np.abs(2*K-k) for k in range(n_agents)}
+        comparator.partial_compare(evals)
+    # plt.savefig('fig2.pdf')
+    fig, axes= plt.subplots(1,2)
+    comparator.plot_results(axes=axes)
+
+def test_plot_sota_noteq2():
+    n_agents = 3
+    comparisons = np.array([(0,i) for i in [1,2]])
+    comparator = MultipleAgentsComparator(n=10, K=K, B=B,  alpha=alpha, 
+                                          comparisons=comparisons, seed=42, beta = 0.01)
+    evals = {}
+    while not comparator.is_finished:
+        if len(evals) >0:
+            for k in range(n_agents):
+                evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=10)+np.abs(2*K-k)])
+        else:
+            evals = {"Agent "+str(k): np.random.normal(size=10)+np.abs(2*K-k) for k in range(n_agents)}
+        comparator.partial_compare(evals)
+    comparator.plot_results_sota()
+    # plt.savefig('fig2.pdf')
+    fig, axes= plt.subplots(1,2)
+    comparator.plot_results_sota(axes=axes)
+

From d6c02171284814efba20cee0c75dba733ce29ae1 Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Tue, 12 Aug 2025 11:17:01 +0200
Subject: [PATCH 10/16] Trigger CI


From 4c74b10c416cdbead1c7f3e8aaba163011e1502d Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Tue, 12 Aug 2025 15:50:14 +0200
Subject: [PATCH 11/16] typo + update tuto

---
 docs/tutorials.md  | 13 ++++++++++++-
 docs/tutorials.org | 17 ++++++++++++++++-
 docs/user_guide.md |  2 +-
 3 files changed, 29 insertions(+), 3 deletions(-)

diff --git a/docs/tutorials.md b/docs/tutorials.md
index 233eac5..902c821 100644
--- a/docs/tutorials.md
+++ b/docs/tutorials.md
@@ -15,6 +15,17 @@ The command line interface takes csv files as input. Each csv file must contain
 Below, we give an example based on files containing the evaluations of PPO,DDPG,SAC,TRPO, four Deep Reinforcement Learning algorithmes, given in the \`examples\` directory of the main repository.
 
 
+## Installation
+
+To install adastop, use pip:
+```bash
+pip install adastop
+```
+
+This will automatically install the command line interface as well as the python library.
+
+
+
 ## Help for cli tool
 
 The AdaStop algorithm is initialized with the first test done through \`adastop compare\` and the current state of AdaStop is then saved in a pickle file. The help of \`adastop\` command line can be obtained with the following:
@@ -90,7 +101,7 @@ The input format of adastop is under the form of a csv file containing the score
 
 Let us launch AdaStop on this first batch of data.
 
-First, we clean up the corrent directory of any litter files that could have been spawned by a previous usage of \`adastop\` (if you never used \`adastop\` before, this command will not have any effect).
+First, we clean up the current directory of any litter files that could have been spawned by a previous usage of \`adastop\` (if you never used \`adastop\` before, this command will not have any effect).
 
 ```bash
 adastop reset . # reset the state of the comparator (remove hidden pickle file)
diff --git a/docs/tutorials.org b/docs/tutorials.org
index 77444ab..db09d1b 100644
--- a/docs/tutorials.org
+++ b/docs/tutorials.org
@@ -15,6 +15,21 @@ Please note that if, in the process of the algorithm, all the comparisons for on
 
 Below, we give an example based on files containing the evaluations of PPO,DDPG,SAC,TRPO, four Deep Reinforcement Learning algorithmes, given in the =examples= directory of the main repository.
 
+
+
+
+** Installation
+
+To install adastop, use pip:
+
+#+begin_src bash :session *shell* :results verbatim :exports both
+pip install adastop
+#+end_src
+
+This will automatically install the command line interface as well as the python library.
+
+
+
 ** Help for cli tool 
 
 The AdaStop algorithm is initialized with the first test done through =adastop compare= and the current state of AdaStop is then saved in a pickle file. The help of =adastop= command line can be obtained with the following:
@@ -47,7 +62,7 @@ The input format of adastop is under the form of a csv file containing the score
 
 Let us launch AdaStop on this first batch of data. 
 
-First, we clean up the corrent directory of any litter files that could have been spawned by a previous usage of =adastop= (if you never used =adastop= before, this command will not have any effect).
+First, we clean up the current directory of any litter files that could have been spawned by a previous usage of =adastop= (if you never used =adastop= before, this command will not have any effect).
 
 #+begin_src bash :session *shell* :results verbatim :exports both 
 adastop reset . # reset the state of the comparator (remove hidden pickle file)
diff --git a/docs/user_guide.md b/docs/user_guide.md
index 8e871f2..30a7294 100644
--- a/docs/user_guide.md
+++ b/docs/user_guide.md
@@ -60,7 +60,7 @@ Then, once you did the comparison on the first file, you can use iteratively `ad
 
 #### Choice of comparisons
 
-In adastopn, one can choose which comparisons are done. The default is to do all the pairwise comparisons between two algorithms. In practice, it is sometimes sufficient to compare to only one of them, a benchmark, for this the `--compare-to-first` argument can be used. For a more fine-grained control on which comparison to do, the python API can take the comparisons as input.
+In adastop, one can choose which comparisons are done. The default is to do all the pairwise comparisons between two algorithms. In practice, it is sometimes sufficient to compare to only one of them, a benchmark, for this the `--compare-to-first` argument can be used. For a more fine-grained control on which comparison to do, the python API can take the comparisons as input.
 
 **Remark**: it is not statistically ok to execute adastop several times and interpret the result as though it was only one test, if adastop is run several times this is multiple testing and some calibration has to be done. Instead, it is better to do all the comparisons at the same time, running the adastop algorithm only once, and adastop will handle the multiplicity of hypotheses by itself.
 

From dfbdb1548285b785988d0ac6f64cbaaf6d9dca0b Mon Sep 17 00:00:00 2001
From: JulienT01 <julien.tj@gmail.com>
Date: Tue, 12 Aug 2025 16:17:53 +0200
Subject: [PATCH 12/16] remove useless index

---
 docs/tutorials.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/tutorials.md b/docs/tutorials.md
index 902c821..cde9077 100644
--- a/docs/tutorials.md
+++ b/docs/tutorials.md
@@ -155,14 +155,14 @@ adastop compare --n-groups 5 --size-group 5  walker5.csv
 
 Test is finished, decisions are
 
-|   | Agent1 vs Agent2 | mean Agent1 | mean Agent2 | mean diff | std Agent 1 | std Agent 2 | decisions |
-|--- |---------------- |----------- |----------- |--------- |----------- |----------- |--------- |
-| 0 | PPO vs DDPG      | 2901.53     | 884.119     | 2017.41   | 1257.93     | 535.74      | larger    |
-| 0 | PPO vs SAC       | 2901.53     | 4543.4      | -1641.87  | 1257.93     | 432.13      | smaller   |
-| 0 | PPO vs TRPO      | 2901.53     | 1215.42     | 1686.11   | 1257.93     | 529.672     | larger    |
-| 0 | DDPG vs SAC      | 884.119     | 4543.4      | -3659.28  | 535.74      | 432.13      | smaller   |
-| 0 | DDPG vs TRPO     | 884.119     | 1215.42     | -331.297  | 535.74      | 529.672     | smaller   |
-| 0 | SAC vs TRPO      | 4543.4      | 1215.42     | 3327.98   | 432.13      | 529.672     | larger    |
+| Agent1 vs Agent2 | mean Agent1 | mean Agent2 | mean diff | std Agent 1 | std Agent 2 | decisions |
+|----------------- |------------ |------------ |---------- |------------ |------------ |---------- |
+| PPO vs DDPG      | 2901.53     | 884.119     | 2017.41   | 1257.93     | 535.74      | larger    |
+| PPO vs SAC       | 2901.53     | 4543.4      | -1641.87  | 1257.93     | 432.13      | smaller   |
+| PPO vs TRPO      | 2901.53     | 1215.42     | 1686.11   | 1257.93     | 529.672     | larger    |
+| DDPG vs SAC      | 884.119     | 4543.4      | -3659.28  | 535.74      | 432.13      | smaller   |
+| DDPG vs TRPO     | 884.119     | 1215.42     | -331.297  | 535.74      | 529.672     | smaller   |
+| SAC vs TRPO      | 4543.4      | 1215.42     | 3327.98   | 432.13      | 529.672     | larger    |
 
 Comparator Saved
 

From e0b76202401280f32928f94aa881c19751f1342e Mon Sep 17 00:00:00 2001
From: TimotheeMathieu <timothee.mathieu@inria.fr>
Date: Wed, 13 Aug 2025 09:50:29 +0200
Subject: [PATCH 13/16] fix seeds in tests

---
 tests/test_cli.py            |  6 +++---
 tests/test_compare_agents.py | 22 +++++++++++++++-------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 5ccc874..777dfed 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -18,10 +18,10 @@ def test_cli():
     assert result.exit_code == 0
     for j in range(1,6):
         
-        result = runner.invoke(adastop, ['compare', 'examples/walker'+str(j)+'.csv'])
+        result = runner.invoke(adastop, ['compare', "--seed", "42",  'examples/walker'+str(j)+'.csv'])
         assert result.exit_code == 0
 
-    result = runner.invoke(adastop, ['compare', 'examples/walker3.csv'])
+    result = runner.invoke(adastop, ['compare',"--seed", "42",  'examples/walker3.csv'])
     assert result.exit_code == 1
 
     result = runner.invoke(adastop, ['plot', 'examples', test_pdf_path])
@@ -34,7 +34,7 @@ def test_cli():
     result = runner.invoke(adastop, ['reset', 'examples'])
     assert result.exit_code == 0
         
-    result = runner.invoke(adastop, ['compare', "--compare-to-first", 'examples/walker1.csv'])
+    result = runner.invoke(adastop, ['compare', "--compare-to-first","--seed", "42",  'examples/walker1.csv'])
     assert result.exit_code == 0
 
 
diff --git a/tests/test_compare_agents.py b/tests/test_compare_agents.py
index 31981fd..2998d89 100644
--- a/tests/test_compare_agents.py
+++ b/tests/test_compare_agents.py
@@ -5,11 +5,13 @@
 B = 5000
 alpha = 0.05
 n_runs = 10
+seed = 42
 
 def test_partial_compare():
+    rng = np.random.RandomState(seed)
     idxs = []
     comparator = MultipleAgentsComparator(n=3, K=3, B=B,  alpha=alpha, seed=42, beta = 0.01)
-    evals = {"Agent "+str(k):np.random.normal(size=3) for k in range(3)}
+    evals = {"Agent "+str(k): rng.normal(size=3) for k in range(3)}
     comparator.partial_compare(evals)
 
 
@@ -22,6 +24,8 @@ def test_partial_compare_not_enough_points():
 
 @pytest.mark.parametrize("K,n", [(10,2),(5,3), (3, 5), (1, 15)])
 def test_type1(K,n):
+    rng = np.random.RandomState(seed)
+
     idxs = []
     n_agents = 3
     for M in range(n_runs):
@@ -30,9 +34,9 @@ def test_type1(K,n):
         while not comparator.is_finished:
             if len(evals) >0:
                 for k in range(n_agents):
-                    evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=n)])
+                    evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , rng.normal(size=n)])
             else:
-                evals = {"Agent "+str(k): np.random.normal(size=n) for k in range(n_agents)}
+                evals = {"Agent "+str(k): rng.normal(size=n) for k in range(n_agents)}
             comparator.partial_compare(evals)
         idxs.append(not("equal" in comparator.decisions.values()))
         print(comparator.get_results())
@@ -40,6 +44,8 @@ def test_type1(K,n):
         
 @pytest.mark.parametrize("K,n", [(5,3), (3, 5), (1, 15)])
 def test_type1_large_beta(K,n):
+    rng = np.random.RandomState(seed)
+
     idxs = []
     n_agents = 3
     for M in range(n_runs):
@@ -48,9 +54,9 @@ def test_type1_large_beta(K,n):
         while not comparator.is_finished:
             if len(evals) >0:
                 for k in range(n_agents):
-                    evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=n)])
+                    evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , rng.normal(size=n)])
             else:
-                evals = {"Agent "+str(k): np.random.normal(size=n) for k in range(n_agents)}
+                evals = {"Agent "+str(k): rng.normal(size=n) for k in range(n_agents)}
             comparator.partial_compare(evals)
         idxs.append(not("equal" in comparator.decisions.values()))
         print(comparator.get_results())
@@ -58,6 +64,8 @@ def test_type1_large_beta(K,n):
         
 @pytest.mark.parametrize("K,n", [(3, 5), (1, 15)])
 def test_type2(K,n):
+    rng = np.random.RandomState(seed)
+
     idxs = []
     n_agents = 2
     for M in range(n_runs):
@@ -66,9 +74,9 @@ def test_type2(K,n):
         while not comparator.is_finished:
             if len(evals) >0:
                 for k in range(n_agents):
-                    evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] ,np.random.normal(size=n)+2*k])
+                    evals["Agent "+str(k)] = np.hstack([evals["Agent "+str(k)] , rng.normal(size=n)+2*k])
             else:
-                evals = {"Agent "+str(k): np.random.normal(size=n)+2*k for k in range(n_agents)}
+                evals = {"Agent "+str(k): rng.normal(size=n)+2*k for k in range(n_agents)}
             comparator.partial_compare(evals)
         idxs.append(not("equal" in comparator.decisions.values()))
     assert np.mean(idxs) > 0.3, "type 2 error seems to be too large."

From 770276fe8ed325c0c94057e85141ee9573dcb1fe Mon Sep 17 00:00:00 2001
From: TimotheeMathieu <timothee.mathieu@inria.fr>
Date: Wed, 13 Aug 2025 10:08:18 +0200
Subject: [PATCH 14/16] switch seed for cli

---
 tests/test_cli.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_cli.py b/tests/test_cli.py
index 777dfed..05c53fe 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -18,10 +18,10 @@ def test_cli():
     assert result.exit_code == 0
     for j in range(1,6):
         
-        result = runner.invoke(adastop, ['compare', "--seed", "42",  'examples/walker'+str(j)+'.csv'])
+        result = runner.invoke(adastop, ['compare', "--seed", "1",  'examples/walker'+str(j)+'.csv'])
         assert result.exit_code == 0
 
-    result = runner.invoke(adastop, ['compare',"--seed", "42",  'examples/walker3.csv'])
+    result = runner.invoke(adastop, ['compare',"--seed", "1",  'examples/walker3.csv'])
     assert result.exit_code == 1
 
     result = runner.invoke(adastop, ['plot', 'examples', test_pdf_path])
@@ -34,7 +34,7 @@ def test_cli():
     result = runner.invoke(adastop, ['reset', 'examples'])
     assert result.exit_code == 0
         
-    result = runner.invoke(adastop, ['compare', "--compare-to-first","--seed", "42",  'examples/walker1.csv'])
+    result = runner.invoke(adastop, ['compare', "--compare-to-first","--seed", "1",  'examples/walker1.csv'])
     assert result.exit_code == 0
 
 
From dcd85dbfb77d0d75c2f9a4903aba24a5c27711f9 Mon Sep 17 00:00:00 2001
From: TimotheeMathieu <timothee.mathieu@inria.fr>
Date: Wed, 13 Aug 2025 10:22:29 +0200
Subject: [PATCH 15/16] correct ValueError

---
 adastop/cli.py    | 3 +--
 tests/test_cli.py | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/adastop/cli.py b/adastop/cli.py
index 1668420..a6513a7 100644
--- a/adastop/cli.py
+++ b/adastop/cli.py
@@ -61,9 +61,8 @@ def compare(ctx, input_file, n_groups, size_group, n_permutations, alpha, beta,
             if i in comparator.current_comparisons.ravel():
                 names.append(comparator.agent_names[i])
 
-
         Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in names]
-        if len(Z[0]) > comparator.K * n_fits_per_group:
+        if len(names) == 0:
             raise ValueError('Error: you tried to use more group than what was initially declared, this is not allowed by the theory.')
         assert "continue" in list(comparator.decisions.values()), "Test finished at last iteration."
 
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 05c53fe..898ae7f 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -17,7 +17,6 @@ def test_cli():
     result = runner.invoke(adastop, ['reset', 'examples'])
     assert result.exit_code == 0
     for j in range(1,6):
-        
         result = runner.invoke(adastop, ['compare', "--seed", "1",  'examples/walker'+str(j)+'.csv'])
         assert result.exit_code == 0
 

From 0db00a8814382393d13b68b894feb70e9477ec8a Mon Sep 17 00:00:00 2001
From: TimotheeMathieu <timothee.mathieu@inria.fr>
Date: Wed, 13 Aug 2025 10:31:54 +0200
Subject: [PATCH 16/16] better message and test message

---
 adastop/cli.py    | 2 +-
 tests/test_cli.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/adastop/cli.py b/adastop/cli.py
index a6513a7..d812e60 100644
--- a/adastop/cli.py
+++ b/adastop/cli.py
@@ -63,7 +63,7 @@ def compare(ctx, input_file, n_groups, size_group, n_permutations, alpha, beta,
 
         Z = [np.hstack([comparator.eval_values[agent], df[agent]]) for agent in names]
         if len(names) == 0:
-            raise ValueError('Error: you tried to use more group than what was initially declared, this is not allowed by the theory.')
+            raise ValueError('Error: you tried to use more group than necessary. Use adastop status to see current status for more info.')
         assert "continue" in list(comparator.decisions.values()), "Test finished at last iteration."
 
     else:
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 898ae7f..08e6afd 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -22,6 +22,8 @@ def test_cli():
 
     result = runner.invoke(adastop, ['compare',"--seed", "1",  'examples/walker3.csv'])
     assert result.exit_code == 1
+    assert result.exception.args[0] == 'Error: you tried to use more group than necessary. Use adastop status to see current status for more info.'
+
 
     result = runner.invoke(adastop, ['plot', 'examples', test_pdf_path])
     assert result.exit_code == 0