From 6e029f0d9eb053cfdd0b93421b763a54d064f8a0 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 5 Dec 2024 11:14:44 +0000 Subject: [PATCH 1/4] Add parameter --- evalplus/evaluate.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/evalplus/evaluate.py b/evalplus/evaluate.py index 3e784bc7fb..cb7606ab76 100644 --- a/evalplus/evaluate.py +++ b/evalplus/evaluate.py @@ -136,6 +136,7 @@ def evaluate( mini: bool = False, noextreme: bool = False, version: str = "default", + output_path: Optional[str] = None, **model_kwargs, ): if model_kwargs: @@ -143,10 +144,16 @@ def evaluate( os.environ["TOKENIZERS_PARALLELISM"] = os.environ.get( "TOKENIZERS_PARALLELISM", "false" ) - samples = run_codegen( - dataset=dataset, + + kwargs = { + "dataset": dataset, **model_kwargs, - ) + } + + if output_path is not None: + kwargs["root"] = output_path + + samples = run_codegen(**kwargs) assert samples is not None, "No samples provided" n_workers = parallel or max(1, multiprocessing.cpu_count() // 2) From 5415b79ea5c08c5be34d0b6e2feb56321c0eaeba Mon Sep 17 00:00:00 2001 From: Qubitium-ModelCloud Date: Thu, 5 Dec 2024 20:29:32 +0800 Subject: [PATCH 2/4] Update evaluate.py --- evalplus/evaluate.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/evalplus/evaluate.py b/evalplus/evaluate.py index cb7606ab76..de102bd975 100644 --- a/evalplus/evaluate.py +++ b/evalplus/evaluate.py @@ -136,7 +136,7 @@ def evaluate( mini: bool = False, noextreme: bool = False, version: str = "default", - output_path: Optional[str] = None, + output_file: Optional[str] = None, **model_kwargs, ): if model_kwargs: @@ -147,12 +147,10 @@ def evaluate( kwargs = { "dataset": dataset, + "output_file": output_file, **model_kwargs, } - if output_path is not None: - kwargs["root"] = output_path - samples = run_codegen(**kwargs) assert samples is not None, "No samples provided" From eb68d0754352e1a8927c4ac7405ecdd327911a10 Mon Sep 17 00:00:00 2001 From: Qubitium-ModelCloud Date: Thu, 5 Dec 2024 20:34:07 +0800 Subject: [PATCH 3/4] Update evaluate.py --- evalplus/evaluate.py | 1 - 1 file changed, 1 deletion(-) diff --git a/evalplus/evaluate.py b/evalplus/evaluate.py index de102bd975..5d7f00d3ff 100644 --- a/evalplus/evaluate.py +++ b/evalplus/evaluate.py @@ -147,7 +147,6 @@ def evaluate( kwargs = { "dataset": dataset, - "output_file": output_file, **model_kwargs, } From 438b14b4af7e6295d077abe7373180bddc446360 Mon Sep 17 00:00:00 2001 From: CL-ModelCloud Date: Thu, 5 Dec 2024 13:03:43 +0000 Subject: [PATCH 4/4] Add custom output_file option for result --- evalplus/evaluate.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/evalplus/evaluate.py b/evalplus/evaluate.py index 5d7f00d3ff..89537a6d75 100644 --- a/evalplus/evaluate.py +++ b/evalplus/evaluate.py @@ -144,13 +144,10 @@ def evaluate( os.environ["TOKENIZERS_PARALLELISM"] = os.environ.get( "TOKENIZERS_PARALLELISM", "false" ) - - kwargs = { - "dataset": dataset, + samples = run_codegen( + dataset=dataset, **model_kwargs, - } - - samples = run_codegen(**kwargs) + ) assert samples is not None, "No samples provided" n_workers = parallel or max(1, multiprocessing.cpu_count() // 2) @@ -161,6 +158,9 @@ def evaluate( assert samples.endswith(".jsonl") result_path = samples.replace(".jsonl", "_eval_results.json") + if output_file is not None: + result_path = output_file + if os.path.isfile(result_path) and not i_just_wanna_run: print(f"Load from previous results from {result_path}") with open(result_path, "r") as f: