Skip to content

Commit 7039346

Browse files
committed
fix tool calling
1 parent 68941f1 commit 7039346

File tree

3 files changed

+20
-17
lines changed

3 files changed

+20
-17
lines changed

deepeval/metrics/tool_correctness/tool_correctness.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
ToolCall,
1414
)
1515
from deepeval.metrics import BaseMetric
16-
import json
1716

1817
required_params: List[LLMTestCaseParams] = [
1918
LLMTestCaseParams.INPUT,
@@ -27,16 +26,13 @@ class ToolCorrectnessMetric(BaseMetric):
2726
def __init__(
2827
self,
2928
threshold: float = 0.5,
30-
evaluation_params: List[ToolCallParams] = [ToolCallParams.TOOL],
29+
evaluation_params: List[ToolCallParams] = [],
3130
include_reason: bool = True,
3231
strict_mode: bool = False,
3332
verbose_mode: bool = False,
3433
should_exact_match: bool = False,
3534
should_consider_ordering: bool = False,
3635
):
37-
assert (
38-
ToolCallParams.TOOL in evaluation_params
39-
), "evaluation_params must include ToolCallParams.TOOL"
4036
self.threshold = 1 if strict_mode else threshold
4137
self.include_reason = include_reason
4238
self.strict_mode = strict_mode

deepeval/test_case/llm_test_case.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ class LLMTestCaseParams(Enum):
1616

1717

1818
class ToolCallParams(Enum):
19-
TOOL = "tool"
2019
INPUT_PARAMETERS = "input_parameters"
2120
OUTPUT = "output"
2221

@@ -40,8 +39,16 @@ def __eq__(self, other):
4039
)
4140

4241
def __hash__(self):
42+
input_params = (
43+
self.input_parameters if self.input_parameters is not None else {}
44+
)
45+
output_hashable = (
46+
frozenset(self.output.items())
47+
if isinstance(self.output, dict)
48+
else self.output
49+
)
4350
return hash(
44-
(self.name, frozenset(self.input_parameters.items()), self.output)
51+
(self.name, frozenset(input_params.items()), output_hashable)
4552
)
4653

4754
def __repr__(self):

tests/test_tools_called.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36,16 +36,16 @@
3636
from deepeval import evaluate
3737

3838
tool_correctness_metric = ToolCorrectnessMetric(verbose_mode=True)
39-
tool_correctness_geval_metric = GEval(
40-
name="Tool Correctness",
41-
criteria="Is the expected tools same as tools called",
42-
evaluation_params=[
43-
LLMTestCaseParams.TOOLS_CALLED,
44-
LLMTestCaseParams.EXPECTED_TOOLS,
45-
],
46-
verbose_mode=True,
47-
)
48-
tool_correctness_geval_metric.measure(tool_test_case)
39+
# tool_correctness_geval_metric = GEval(
40+
# name="Tool Correctness",
41+
# criteria="Is the expected tools same as tools called",
42+
# evaluation_params=[
43+
# LLMTestCaseParams.TOOLS_CALLED,
44+
# LLMTestCaseParams.EXPECTED_TOOLS,
45+
# ],
46+
# verbose_mode=True,
47+
# )
48+
# tool_correctness_geval_metric.measure(tool_test_case)
4949
tool_correctness_metric.measure(tool_test_case)
5050

5151
################################################

0 commit comments

Comments
 (0)