File tree Expand file tree Collapse file tree 3 files changed +20
-17
lines changed Expand file tree Collapse file tree 3 files changed +20
-17
lines changed Original file line number Diff line number Diff line change 13
13
ToolCall ,
14
14
)
15
15
from deepeval .metrics import BaseMetric
16
- import json
17
16
18
17
required_params : List [LLMTestCaseParams ] = [
19
18
LLMTestCaseParams .INPUT ,
@@ -27,16 +26,13 @@ class ToolCorrectnessMetric(BaseMetric):
27
26
def __init__ (
28
27
self ,
29
28
threshold : float = 0.5 ,
30
- evaluation_params : List [ToolCallParams ] = [ToolCallParams . TOOL ],
29
+ evaluation_params : List [ToolCallParams ] = [],
31
30
include_reason : bool = True ,
32
31
strict_mode : bool = False ,
33
32
verbose_mode : bool = False ,
34
33
should_exact_match : bool = False ,
35
34
should_consider_ordering : bool = False ,
36
35
):
37
- assert (
38
- ToolCallParams .TOOL in evaluation_params
39
- ), "evaluation_params must include ToolCallParams.TOOL"
40
36
self .threshold = 1 if strict_mode else threshold
41
37
self .include_reason = include_reason
42
38
self .strict_mode = strict_mode
Original file line number Diff line number Diff line change @@ -16,7 +16,6 @@ class LLMTestCaseParams(Enum):
16
16
17
17
18
18
class ToolCallParams (Enum ):
19
- TOOL = "tool"
20
19
INPUT_PARAMETERS = "input_parameters"
21
20
OUTPUT = "output"
22
21
@@ -40,8 +39,16 @@ def __eq__(self, other):
40
39
)
41
40
42
41
def __hash__ (self ):
42
+ input_params = (
43
+ self .input_parameters if self .input_parameters is not None else {}
44
+ )
45
+ output_hashable = (
46
+ frozenset (self .output .items ())
47
+ if isinstance (self .output , dict )
48
+ else self .output
49
+ )
43
50
return hash (
44
- (self .name , frozenset (self . input_parameters . items ()), self . output )
51
+ (self .name , frozenset (input_params . items ()), output_hashable )
45
52
)
46
53
47
54
def __repr__ (self ):
Original file line number Diff line number Diff line change 36
36
from deepeval import evaluate
37
37
38
38
tool_correctness_metric = ToolCorrectnessMetric (verbose_mode = True )
39
- tool_correctness_geval_metric = GEval (
40
- name = "Tool Correctness" ,
41
- criteria = "Is the expected tools same as tools called" ,
42
- evaluation_params = [
43
- LLMTestCaseParams .TOOLS_CALLED ,
44
- LLMTestCaseParams .EXPECTED_TOOLS ,
45
- ],
46
- verbose_mode = True ,
47
- )
48
- tool_correctness_geval_metric .measure (tool_test_case )
39
+ # tool_correctness_geval_metric = GEval(
40
+ # name="Tool Correctness",
41
+ # criteria="Is the expected tools same as tools called",
42
+ # evaluation_params=[
43
+ # LLMTestCaseParams.TOOLS_CALLED,
44
+ # LLMTestCaseParams.EXPECTED_TOOLS,
45
+ # ],
46
+ # verbose_mode=True,
47
+ # )
48
+ # tool_correctness_geval_metric.measure(tool_test_case)
49
49
tool_correctness_metric .measure (tool_test_case )
50
50
51
51
################################################
You can’t perform that action at this time.
0 commit comments