2
2
from typing import Optional , List , Union
3
3
from pydantic import BaseModel , Field
4
4
5
- from deepeval .utils import get_or_create_event_loop
5
+ from deepeval .utils import get_or_create_event_loop , generate_uuid
6
6
from deepeval .metrics .utils import (
7
7
validate_conversational_test_case ,
8
8
trimAndLoadJson ,
9
9
check_llm_test_case_params ,
10
10
initialize_model ,
11
+ print_intermediate_steps ,
11
12
)
12
13
from deepeval .test_case import (
13
14
LLMTestCase ,
@@ -31,6 +32,21 @@ class AnswerRelvancyVerdict(BaseModel):
31
32
32
33
33
34
class AnswerRelevancyMetric (BaseMetric ):
35
+ @property
36
+ def statements (self ) -> Optional [List [str ]]:
37
+ return self ._statements .get ()
38
+
39
+ @statements .setter
40
+ def statements (self , value : Optional [List [str ]]):
41
+ self ._statements .set (value )
42
+
43
+ @property
44
+ def verdicts (self ) -> Optional [List [AnswerRelvancyVerdict ]]:
45
+ return self ._verdicts .get ()
46
+
47
+ @verdicts .setter
48
+ def verdicts (self , value : Optional [List [AnswerRelvancyVerdict ]]):
49
+ self ._verdicts .set (value )
34
50
35
51
def __init__ (
36
52
self ,
@@ -39,35 +55,24 @@ def __init__(
39
55
include_reason : bool = True ,
40
56
async_mode : bool = True ,
41
57
strict_mode : bool = False ,
58
+ verbose_mode : bool = False ,
42
59
):
43
- super ().__init__ ()
44
- self ._statements : ContextVar [Optional [List [str ]]] = ContextVar (f'{ self .__class__ .__name__ } _statements' , default = None )
45
- self ._verdicts : ContextVar [Optional [List [AnswerRelvancyVerdict ]]] = ContextVar (f'{ self .__class__ .__name__ } _verdicts' , default = None )
60
+ self ._statements : ContextVar [Optional [List [str ]]] = ContextVar (
61
+ generate_uuid (), default = None
62
+ )
63
+ self ._verdicts : ContextVar [Optional [List [AnswerRelvancyVerdict ]]] = (
64
+ ContextVar (generate_uuid (), default = None )
65
+ )
46
66
self .threshold = 1 if strict_mode else threshold
47
67
self .model , self .using_native_model = initialize_model (model )
48
68
self .evaluation_model = self .model .get_model_name ()
49
69
self .include_reason = include_reason
50
70
self .async_mode = async_mode
51
71
self .strict_mode = strict_mode
52
-
53
- @property
54
- def statements (self ) -> Optional [List [str ]]:
55
- return self ._statements .get ()
56
- @statements .setter
57
- def statements (self , value : Optional [List [str ]]):
58
- self ._statements .set (value )
59
-
60
- @property
61
- def verdicts (self ) -> Optional [List [AnswerRelvancyVerdict ]]:
62
- return self ._verdicts .get ()
63
- @verdicts .setter
64
- def verdicts (self , value : Optional [List [AnswerRelvancyVerdict ]]):
65
- self ._verdicts .set (value )
72
+ self .verbose_mode = verbose_mode
66
73
67
74
def measure (
68
- self ,
69
- test_case : Union [LLMTestCase , ConversationalTestCase ],
70
- verbose : bool = True ,
75
+ self , test_case : Union [LLMTestCase , ConversationalTestCase ]
71
76
) -> float :
72
77
if isinstance (test_case , ConversationalTestCase ):
73
78
test_case = validate_conversational_test_case (test_case , self )
@@ -78,44 +83,36 @@ def measure(
78
83
if self .async_mode :
79
84
loop = get_or_create_event_loop ()
80
85
(
81
- self .statements ,
82
- self .verdicts ,
83
- self .score ,
84
- self .reason ,
85
- self .success
86
- ) = loop .run_until_complete (
87
- self ._measure_async (test_case , verbose )
88
- )
86
+ self .statements ,
87
+ self .verdicts ,
88
+ self .score ,
89
+ self .reason ,
90
+ self .success ,
91
+ ) = loop .run_until_complete (self ._measure_async (test_case ))
89
92
else :
90
- self .statements = self ._generate_statements (
93
+ self .statements : List [ str ] = self ._generate_statements (
91
94
test_case .actual_output
92
95
)
93
- self .verdicts = self ._generate_verdicts (test_case .input )
96
+ self .verdicts : List [AnswerRelvancyVerdict ] = (
97
+ self ._generate_verdicts (test_case .input )
98
+ )
94
99
self .score = self ._calculate_score ()
95
100
self .reason = self ._generate_reason (test_case .input )
96
101
self .success = self .score >= self .threshold
97
- if verbose :
98
- print (f"statements: { self .statements } \n verdicts: { self .verdicts } \n " )
102
+ if self .verbose_mode :
103
+ print_intermediate_steps (
104
+ self .__name__ ,
105
+ steps = [
106
+ f"Statements:\n { self .statements } \n " ,
107
+ f"Verdicts:\n { self .verdicts } " ,
108
+ ],
109
+ )
99
110
return self .score
100
-
101
- async def _measure_async (
102
- self ,
103
- test_case : Union [LLMTestCase , ConversationalTestCase ],
104
- verbose : bool ):
105
- await self .a_measure (test_case , _show_indicator = False , verbose = verbose )
106
- return (
107
- self .statements ,
108
- self .verdicts ,
109
- self .score ,
110
- self .reason ,
111
- self .success
112
- )
113
111
114
112
async def a_measure (
115
113
self ,
116
114
test_case : Union [LLMTestCase , ConversationalTestCase ],
117
115
_show_indicator : bool = True ,
118
- verbose : bool = True
119
116
) -> float :
120
117
if isinstance (test_case , ConversationalTestCase ):
121
118
test_case = validate_conversational_test_case (test_case , self )
@@ -125,19 +122,37 @@ async def a_measure(
125
122
with metric_progress_indicator (
126
123
self , async_mode = True , _show_indicator = _show_indicator
127
124
):
128
- self .statements = await self ._a_generate_statements (
125
+ self .statements : List [ str ] = await self ._a_generate_statements (
129
126
test_case .actual_output
130
127
)
131
- self .verdicts = (
128
+ self .verdicts : List [ AnswerRelvancyVerdict ] = (
132
129
await self ._a_generate_verdicts (test_case .input )
133
130
)
134
131
self .score = self ._calculate_score ()
135
132
self .reason = await self ._a_generate_reason (test_case .input )
136
133
self .success = self .score >= self .threshold
137
- if verbose :
138
- print (f"statements: { self .statements } \n verdicts: { self .verdicts } \n score: { self .score } , success: { self .success } \n " )
134
+ if self .verbose_mode :
135
+ print_intermediate_steps (
136
+ self .__name__ ,
137
+ steps = [
138
+ f"Statements:\n { self .statements } \n " ,
139
+ f"Verdicts:\n { self .verdicts } " ,
140
+ ],
141
+ )
139
142
return self .score
140
143
144
+ async def _measure_async (
145
+ self , test_case : Union [LLMTestCase , ConversationalTestCase ]
146
+ ):
147
+ await self .a_measure (test_case , _show_indicator = False )
148
+ return (
149
+ self .statements ,
150
+ self .verdicts ,
151
+ self .score ,
152
+ self .reason ,
153
+ self .success ,
154
+ )
155
+
141
156
async def _a_generate_reason (self , input : str ) -> str :
142
157
if self .include_reason is False :
143
158
return None
@@ -275,4 +290,4 @@ def is_successful(self) -> bool:
275
290
276
291
@property
277
292
def __name__ (self ):
278
- return "Answer Relevancy"
293
+ return "Answer Relevancy"
0 commit comments