@@ -340,11 +340,11 @@ def run_eval(
340340 )
341341
342342 # Upsert the local Evaluators; other Evaluators are just referenced by `path` or `id`
343- local_evaluators : List [Evaluator ] = []
343+ local_evaluators : List [tuple [ EvaluatorResponse , Callable ] ] = []
344344 if evaluators :
345- for evaluator in evaluators :
345+ for evaluator_request in evaluators :
346346 # If a callable is provided for an Evaluator, we treat it as External
347- eval_function = evaluator .get ("callable" )
347+ eval_function = evaluator_request .get ("callable" )
348348 if eval_function is not None :
349349 # TODO: support the case where `file` logs generated on Humanloop but Evaluator logs generated locally
350350 if function_ is None :
@@ -353,25 +353,26 @@ def run_eval(
353353 f"{ type_ } 's `callable`. Please provide a `callable` for your file in order "
354354 "to run Evaluators locally."
355355 )
356- local_evaluators .append (evaluator )
357356 spec = ExternalEvaluator (
358- arguments_type = evaluator ["args_type" ],
359- return_type = evaluator ["return_type" ],
357+ arguments_type = evaluator_request ["args_type" ],
358+ return_type = evaluator_request ["return_type" ],
360359 attributes = {"code" : inspect .getsource (eval_function )},
361360 evaluator_type = "external" ,
362361 )
363- client .evaluators .upsert (
364- id = evaluator .get ("id" ),
365- path = evaluator .get ("path" ),
362+ evaluator = client .evaluators .upsert (
363+ id = evaluator_request .get ("id" ),
364+ path = evaluator_request .get ("path" ),
366365 spec = spec ,
367366 )
367+ local_evaluators .append ((evaluator , eval_function ))
368+
368369 # function_ cannot be None, cast it for type checking
369370 function_ = typing .cast (Callable , function_ )
370371
371372 # Validate upfront that the local Evaluators and Dataset fit
372373 requires_target = False
373- for local_evaluator in local_evaluators :
374- if local_evaluator [ "args_type" ] == "target_required" :
374+ for local_evaluator , _ in local_evaluators :
375+ if local_evaluator . spec . arguments_type == "target_required" :
375376 requires_target = True
376377 break
377378 if requires_target :
@@ -382,7 +383,7 @@ def run_eval(
382383 if missing_target > 0 :
383384 raise ValueError (
384385 f"{ missing_target } Datapoints have no target. A target "
385- f"is required for the Evaluator: { local_evaluator [ ' path' ] } "
386+ f"is required for the Evaluator: { local_evaluator . path } "
386387 )
387388
388389 # Get or create the Evaluation based on the name
@@ -408,7 +409,7 @@ def run_eval(
408409 run : EvaluationRunResponse = client .evaluations .create_run (
409410 id = evaluation .id ,
410411 dataset = {"version_id" : hl_dataset .version_id },
411- orchestrated = False ,
412+ orchestrated = False if function_ is not None else True ,
412413 )
413414 # Every Run will generate a new batch of Logs
414415 run_id = run .id
@@ -715,7 +716,7 @@ def _run_local_evaluators(
715716 client : "BaseHumanloop" ,
716717 log_id : str ,
717718 datapoint : Optional [Datapoint ],
718- local_evaluators : list [Evaluator ],
719+ local_evaluators : list [tuple [ EvaluatorResponse , Callable ] ],
719720):
720721 """Run local Evaluators on the Log and send the judgments to Humanloop."""
721722 # Need to get the full log to pass to the evaluators
@@ -725,11 +726,10 @@ def _run_local_evaluators(
725726 else :
726727 log_dict = log
727728 datapoint_dict = datapoint .dict () if datapoint else None
728- for local_evaluator in local_evaluators :
729+ for local_evaluator , eval_function in local_evaluators :
729730 start_time = datetime .now ()
730731 try :
731- eval_function = local_evaluator ["callable" ]
732- if local_evaluator ["args_type" ] == "target_required" :
732+ if local_evaluator .spec .arguments_type == "target_required" :
733733 judgement = eval_function (
734734 log_dict ,
735735 datapoint_dict ,
@@ -738,20 +738,21 @@ def _run_local_evaluators(
738738 judgement = eval_function (log_dict )
739739
740740 _ = client .evaluators .log (
741+ version_id = local_evaluator .version_id ,
741742 parent_id = log_id ,
742743 judgment = judgement ,
743- id = local_evaluator .get ( "id" ) ,
744- path = local_evaluator .get ( " path" ) ,
744+ id = local_evaluator .id ,
745+ path = local_evaluator .path ,
745746 start_time = start_time ,
746747 end_time = datetime .now (),
747748 )
748749 except Exception as e :
749750 _ = client .evaluators .log (
750751 parent_id = log_id ,
751- path = local_evaluator .get ( " path" ) ,
752- id = local_evaluator .get ( "id" ) ,
752+ path = local_evaluator .path ,
753+ id = local_evaluator .id ,
753754 error = str (e ),
754755 start_time = start_time ,
755756 end_time = datetime .now (),
756757 )
757- logger .warning (f"\n Evaluator { local_evaluator [ ' path' ] } failed with error { str (e )} " )
758+ logger .warning (f"\n Evaluator { local_evaluator . path } failed with error { str (e )} " )
0 commit comments