Skip to content

Commit e49b077

Browse files
committed
add typing
1 parent 2c49fa9 commit e49b077

File tree

10 files changed

+191
-153
lines changed

10 files changed

+191
-153
lines changed

deepeval/metrics/answer_relevancy/answer_relevancy.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,12 @@ def measure(
9292
self._measure_async(test_case, verbose)
9393
)
9494
else:
95-
self.statements = self._generate_statements(
95+
self.statements: List[str] = self._generate_statements(
9696
test_case.actual_output
9797
)
98-
self.verdicts = self._generate_verdicts(test_case.input)
98+
self.verdicts: List[AnswerRelvancyVerdict] = (
99+
self._generate_verdicts(test_case.input)
100+
)
99101
self.score = self._calculate_score()
100102
self.reason = self._generate_reason(test_case.input)
101103
self.success = self.score >= self.threshold
@@ -105,20 +107,6 @@ def measure(
105107
)
106108
return self.score
107109

108-
async def _measure_async(
109-
self,
110-
test_case: Union[LLMTestCase, ConversationalTestCase],
111-
verbose: bool,
112-
):
113-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
114-
return (
115-
self.statements,
116-
self.verdicts,
117-
self.score,
118-
self.reason,
119-
self.success,
120-
)
121-
122110
async def a_measure(
123111
self,
124112
test_case: Union[LLMTestCase, ConversationalTestCase],
@@ -133,10 +121,12 @@ async def a_measure(
133121
with metric_progress_indicator(
134122
self, async_mode=True, _show_indicator=_show_indicator
135123
):
136-
self.statements = await self._a_generate_statements(
124+
self.statements: List[str] = await self._a_generate_statements(
137125
test_case.actual_output
138126
)
139-
self.verdicts = await self._a_generate_verdicts(test_case.input)
127+
self.verdicts: List[AnswerRelvancyVerdict] = (
128+
await self._a_generate_verdicts(test_case.input)
129+
)
140130
self.score = self._calculate_score()
141131
self.reason = await self._a_generate_reason(test_case.input)
142132
self.success = self.score >= self.threshold
@@ -146,6 +136,20 @@ async def a_measure(
146136
)
147137
return self.score
148138

139+
async def _measure_async(
140+
self,
141+
test_case: Union[LLMTestCase, ConversationalTestCase],
142+
verbose: bool,
143+
):
144+
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
145+
return (
146+
self.statements,
147+
self.verdicts,
148+
self.score,
149+
self.reason,
150+
self.success,
151+
)
152+
149153
async def _a_generate_reason(self, input: str) -> str:
150154
if self.include_reason is False:
151155
return None

deepeval/metrics/bias/bias.py

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,10 @@ def measure(
9494
self._measure_async(test_case, verbose)
9595
)
9696
else:
97-
self.opinions = self._generate_opinions(test_case.actual_output)
98-
self.verdicts = self._generate_verdicts()
97+
self.opinions: List[str] = self._generate_opinions(
98+
test_case.actual_output
99+
)
100+
self.verdicts: List[BiasVerdict] = self._generate_verdicts()
99101
self.score = self._calculate_score()
100102
self.reason = self._generate_reason()
101103
self.success = self.score <= self.threshold
@@ -105,20 +107,6 @@ def measure(
105107
)
106108
return self.score
107109

108-
async def _measure_async(
109-
self,
110-
test_case: Union[LLMTestCase, ConversationalTestCase],
111-
verbose: bool,
112-
):
113-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
114-
return (
115-
self.opinions,
116-
self.verdicts,
117-
self.score,
118-
self.reason,
119-
self.success,
120-
)
121-
122110
async def a_measure(
123111
self,
124112
test_case: Union[LLMTestCase, ConversationalTestCase],
@@ -135,17 +123,31 @@ async def a_measure(
135123
async_mode=True,
136124
_show_indicator=_show_indicator,
137125
):
138-
self.opinions = await self._a_generate_opinions(
126+
self.opinions: List[str] = await self._a_generate_opinions(
139127
test_case.actual_output
140128
)
141-
self.verdicts = await self._a_generate_verdicts()
129+
self.verdicts: List[BiasVerdict] = await self._a_generate_verdicts()
142130
self.score = self._calculate_score()
143131
self.reason = await self._a_generate_reason()
144132
self.success = self.score <= self.threshold
145133
if verbose:
146134
print(f"opinions: {self.opinions}\nverdicts: {self.verdicts}\n")
147135
return self.score
148136

137+
async def _measure_async(
138+
self,
139+
test_case: Union[LLMTestCase, ConversationalTestCase],
140+
verbose: bool,
141+
):
142+
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
143+
return (
144+
self.opinions,
145+
self.verdicts,
146+
self.score,
147+
self.reason,
148+
self.success,
149+
)
150+
149151
async def _a_generate_reason(self) -> str:
150152
if self.include_reason is False:
151153
return None

deepeval/metrics/contextual_precision/contextual_precision.py

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,12 @@ def measure(
8282
)
8383
)
8484
else:
85-
self.verdicts = self._generate_verdicts(
86-
test_case.input,
87-
test_case.expected_output,
88-
test_case.retrieval_context,
85+
self.verdicts: List[ContextualPrecisionVerdict] = (
86+
self._generate_verdicts(
87+
test_case.input,
88+
test_case.expected_output,
89+
test_case.retrieval_context,
90+
)
8991
)
9092
self.score = self._calculate_score()
9193
self.reason = self._generate_reason(test_case.input)
@@ -94,14 +96,6 @@ def measure(
9496
print(f"verdicts: {self.verdicts}\n")
9597
return self.score
9698

97-
async def _measure_async(
98-
self,
99-
test_case: Union[LLMTestCase, ConversationalTestCase],
100-
verbose: bool,
101-
):
102-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
103-
return (self.verdicts, self.score, self.reason, self.success)
104-
10599
async def a_measure(
106100
self,
107101
test_case: Union[LLMTestCase, ConversationalTestCase],
@@ -118,10 +112,12 @@ async def a_measure(
118112
async_mode=True,
119113
_show_indicator=_show_indicator,
120114
):
121-
self.verdicts = await self._a_generate_verdicts(
122-
test_case.input,
123-
test_case.expected_output,
124-
test_case.retrieval_context,
115+
self.verdicts: List[ContextualPrecisionVerdict] = (
116+
await self._a_generate_verdicts(
117+
test_case.input,
118+
test_case.expected_output,
119+
test_case.retrieval_context,
120+
)
125121
)
126122
self.score = self._calculate_score()
127123
self.reason = await self._a_generate_reason(test_case.input)
@@ -130,6 +126,14 @@ async def a_measure(
130126
print(f"verdicts: {self.verdicts}\n")
131127
return self.score
132128

129+
async def _measure_async(
130+
self,
131+
test_case: Union[LLMTestCase, ConversationalTestCase],
132+
verbose: bool,
133+
):
134+
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
135+
return (self.verdicts, self.score, self.reason, self.success)
136+
133137
async def _a_generate_reason(self, input: str):
134138
if self.include_reason is False:
135139
return None

deepeval/metrics/contextual_recall/contextual_recall.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,8 +79,10 @@ def measure(
7979
)
8080
)
8181
else:
82-
self.verdicts = self._generate_verdicts(
83-
test_case.expected_output, test_case.retrieval_context
82+
self.verdicts: List[ContextualRecallVerdict] = (
83+
self._generate_verdicts(
84+
test_case.expected_output, test_case.retrieval_context
85+
)
8486
)
8587
self.score = self._calculate_score()
8688
self.reason = self._generate_reason(test_case.input)
@@ -89,14 +91,6 @@ def measure(
8991
print(f"verdicts: {self.verdicts}\n")
9092
return self.score
9193

92-
async def _measure_async(
93-
self,
94-
test_case: Union[LLMTestCase, ConversationalTestCase],
95-
verbose: bool,
96-
):
97-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
98-
return (self.verdicts, self.score, self.reason, self.success)
99-
10094
async def a_measure(
10195
self,
10296
test_case: Union[LLMTestCase, ConversationalTestCase],
@@ -113,8 +107,10 @@ async def a_measure(
113107
async_mode=True,
114108
_show_indicator=_show_indicator,
115109
):
116-
self.verdicts = await self._a_generate_verdicts(
117-
test_case.expected_output, test_case.retrieval_context
110+
self.verdicts: List[ContextualRecallVerdict] = (
111+
await self._a_generate_verdicts(
112+
test_case.expected_output, test_case.retrieval_context
113+
)
118114
)
119115
self.score = self._calculate_score()
120116
self.reason = await self._a_generate_reason(test_case.input)
@@ -123,6 +119,14 @@ async def a_measure(
123119
print(f"verdicts: {self.verdicts}\n")
124120
return self.score
125121

122+
async def _measure_async(
123+
self,
124+
test_case: Union[LLMTestCase, ConversationalTestCase],
125+
verbose: bool,
126+
):
127+
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
128+
return (self.verdicts, self.score, self.reason, self.success)
129+
126130
async def _a_generate_reason(self, expected_output: str):
127131
if self.include_reason is False:
128132
return None

deepeval/metrics/contextual_relevancy/contextual_relevancy.py

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,10 @@ def measure(
8181
)
8282
)
8383
else:
84-
self.verdicts = self._generate_verdicts(
85-
test_case.input, test_case.retrieval_context
84+
self.verdicts: List[ContextualRelevancyVerdict] = (
85+
self._generate_verdicts(
86+
test_case.input, test_case.retrieval_context
87+
)
8688
)
8789
self.score = self._calculate_score()
8890
self.reason = self._generate_reason(test_case.input)
@@ -91,14 +93,6 @@ def measure(
9193
print(f"verdicts: {self.verdicts}\n")
9294
return self.score
9395

94-
async def _measure_async(
95-
self,
96-
test_case: Union[LLMTestCase, ConversationalTestCase],
97-
verbose: bool,
98-
):
99-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
100-
return (self.verdicts, self.score, self.reason, self.success)
101-
10296
async def a_measure(
10397
self,
10498
test_case: Union[LLMTestCase, ConversationalTestCase],
@@ -115,8 +109,10 @@ async def a_measure(
115109
async_mode=True,
116110
_show_indicator=_show_indicator,
117111
):
118-
self.verdicts = await self._a_generate_verdicts(
119-
test_case.input, test_case.retrieval_context
112+
self.verdicts: List[ContextualRelevancyVerdict] = (
113+
await self._a_generate_verdicts(
114+
test_case.input, test_case.retrieval_context
115+
)
120116
)
121117
self.score = self._calculate_score()
122118
self.reason = await self._a_generate_reason(test_case.input)
@@ -125,6 +121,14 @@ async def a_measure(
125121
print(f"verdicts: {self.verdicts}\n")
126122
return self.score
127123

124+
async def _measure_async(
125+
self,
126+
test_case: Union[LLMTestCase, ConversationalTestCase],
127+
verbose: bool,
128+
):
129+
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
130+
return (self.verdicts, self.score, self.reason, self.success)
131+
128132
async def _a_generate_reason(self, input: str):
129133
if self.include_reason is False:
130134
return None

deepeval/metrics/faithfulness/faithfulness.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -107,9 +107,15 @@ def measure(
107107
self._measure_async(test_case, verbose)
108108
)
109109
else:
110-
self.truths = self._generate_truths(test_case.retrieval_context)
111-
self.claims = self._generate_claims(test_case.actual_output)
112-
self.verdicts = self._generate_verdicts()
110+
self.truths: List[str] = self._generate_truths(
111+
test_case.retrieval_context
112+
)
113+
self.claims: List[str] = self._generate_claims(
114+
test_case.actual_output
115+
)
116+
self.verdicts: List[FaithfulnessVerdict] = (
117+
self._generate_verdicts()
118+
)
113119
self.score = self._calculate_score()
114120
self.reason = self._generate_reason()
115121
self.success = self.score >= self.threshold
@@ -119,21 +125,6 @@ def measure(
119125
)
120126
return self.score
121127

122-
async def _measure_async(
123-
self,
124-
test_case: Union[LLMTestCase, ConversationalTestCase],
125-
verbose: bool,
126-
):
127-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
128-
return (
129-
self.truths,
130-
self.claims,
131-
self.verdicts,
132-
self.score,
133-
self.reason,
134-
self.success,
135-
)
136-
137128
async def a_measure(
138129
self,
139130
test_case: Union[LLMTestCase, ConversationalTestCase],
@@ -152,7 +143,9 @@ async def a_measure(
152143
self._a_generate_truths(test_case.retrieval_context),
153144
self._a_generate_claims(test_case.actual_output),
154145
)
155-
self.verdicts = await self._a_generate_verdicts()
146+
self.verdicts: List[FaithfulnessVerdict] = (
147+
await self._a_generate_verdicts()
148+
)
156149
self.score = self._calculate_score()
157150
self.reason = await self._a_generate_reason()
158151
self.success = self.score >= self.threshold
@@ -162,6 +155,21 @@ async def a_measure(
162155
)
163156
return self.score
164157

158+
async def _measure_async(
159+
self,
160+
test_case: Union[LLMTestCase, ConversationalTestCase],
161+
verbose: bool,
162+
):
163+
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
164+
return (
165+
self.truths,
166+
self.claims,
167+
self.verdicts,
168+
self.score,
169+
self.reason,
170+
self.success,
171+
)
172+
165173
async def _a_generate_reason(self) -> str:
166174
if self.include_reason is False:
167175
return None

0 commit comments

Comments
 (0)