Skip to content

Commit a715452

Browse files
authored
Make sure the external evaluator references the right version when logging (#36)
1 parent 1a06e02 commit a715452

File tree

1 file changed

+23
-22
lines changed
  • src/humanloop/eval_utils

1 file changed

+23
-22
lines changed

src/humanloop/eval_utils/run.py

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -340,11 +340,11 @@ def run_eval(
340340
)
341341

342342
# Upsert the local Evaluators; other Evaluators are just referenced by `path` or `id`
343-
local_evaluators: List[Evaluator] = []
343+
local_evaluators: List[tuple[EvaluatorResponse, Callable]] = []
344344
if evaluators:
345-
for evaluator in evaluators:
345+
for evaluator_request in evaluators:
346346
# If a callable is provided for an Evaluator, we treat it as External
347-
eval_function = evaluator.get("callable")
347+
eval_function = evaluator_request.get("callable")
348348
if eval_function is not None:
349349
# TODO: support the case where `file` logs generated on Humanloop but Evaluator logs generated locally
350350
if function_ is None:
@@ -353,25 +353,26 @@ def run_eval(
353353
f"{type_}'s `callable`. Please provide a `callable` for your file in order "
354354
"to run Evaluators locally."
355355
)
356-
local_evaluators.append(evaluator)
357356
spec = ExternalEvaluator(
358-
arguments_type=evaluator["args_type"],
359-
return_type=evaluator["return_type"],
357+
arguments_type=evaluator_request["args_type"],
358+
return_type=evaluator_request["return_type"],
360359
attributes={"code": inspect.getsource(eval_function)},
361360
evaluator_type="external",
362361
)
363-
client.evaluators.upsert(
364-
id=evaluator.get("id"),
365-
path=evaluator.get("path"),
362+
evaluator = client.evaluators.upsert(
363+
id=evaluator_request.get("id"),
364+
path=evaluator_request.get("path"),
366365
spec=spec,
367366
)
367+
local_evaluators.append((evaluator, eval_function))
368+
368369
# function_ cannot be None, cast it for type checking
369370
function_ = typing.cast(Callable, function_)
370371

371372
# Validate upfront that the local Evaluators and Dataset fit
372373
requires_target = False
373-
for local_evaluator in local_evaluators:
374-
if local_evaluator["args_type"] == "target_required":
374+
for local_evaluator, _ in local_evaluators:
375+
if local_evaluator.spec.arguments_type == "target_required":
375376
requires_target = True
376377
break
377378
if requires_target:
@@ -382,7 +383,7 @@ def run_eval(
382383
if missing_target > 0:
383384
raise ValueError(
384385
f"{missing_target} Datapoints have no target. A target "
385-
f"is required for the Evaluator: {local_evaluator['path']}"
386+
f"is required for the Evaluator: {local_evaluator.path}"
386387
)
387388

388389
# Get or create the Evaluation based on the name
@@ -408,7 +409,7 @@ def run_eval(
408409
run: EvaluationRunResponse = client.evaluations.create_run(
409410
id=evaluation.id,
410411
dataset={"version_id": hl_dataset.version_id},
411-
orchestrated=False,
412+
orchestrated=False if function_ is not None else True,
412413
)
413414
# Every Run will generate a new batch of Logs
414415
run_id = run.id
@@ -715,7 +716,7 @@ def _run_local_evaluators(
715716
client: "BaseHumanloop",
716717
log_id: str,
717718
datapoint: Optional[Datapoint],
718-
local_evaluators: list[Evaluator],
719+
local_evaluators: list[tuple[EvaluatorResponse, Callable]],
719720
):
720721
"""Run local Evaluators on the Log and send the judgments to Humanloop."""
721722
# Need to get the full log to pass to the evaluators
@@ -725,11 +726,10 @@ def _run_local_evaluators(
725726
else:
726727
log_dict = log
727728
datapoint_dict = datapoint.dict() if datapoint else None
728-
for local_evaluator in local_evaluators:
729+
for local_evaluator, eval_function in local_evaluators:
729730
start_time = datetime.now()
730731
try:
731-
eval_function = local_evaluator["callable"]
732-
if local_evaluator["args_type"] == "target_required":
732+
if local_evaluator.spec.arguments_type == "target_required":
733733
judgement = eval_function(
734734
log_dict,
735735
datapoint_dict,
@@ -738,20 +738,21 @@ def _run_local_evaluators(
738738
judgement = eval_function(log_dict)
739739

740740
_ = client.evaluators.log(
741+
version_id=local_evaluator.version_id,
741742
parent_id=log_id,
742743
judgment=judgement,
743-
id=local_evaluator.get("id"),
744-
path=local_evaluator.get("path"),
744+
id=local_evaluator.id,
745+
path=local_evaluator.path,
745746
start_time=start_time,
746747
end_time=datetime.now(),
747748
)
748749
except Exception as e:
749750
_ = client.evaluators.log(
750751
parent_id=log_id,
751-
path=local_evaluator.get("path"),
752-
id=local_evaluator.get("id"),
752+
path=local_evaluator.path,
753+
id=local_evaluator.id,
753754
error=str(e),
754755
start_time=start_time,
755756
end_time=datetime.now(),
756757
)
757-
logger.warning(f"\nEvaluator {local_evaluator['path']} failed with error {str(e)}")
758+
logger.warning(f"\nEvaluator {local_evaluator.path} failed with error {str(e)}")

0 commit comments

Comments
 (0)