Skip to content

Commit aafc017

Browse files
committed
Slightly better verbose printing
1 parent 7d8c3f2 commit aafc017

File tree

14 files changed

+181
-39
lines changed

14 files changed

+181
-39
lines changed

deepeval/evaluate.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -308,7 +308,6 @@ async def a_execute_test_cases(
308308

309309
for metric in metrics:
310310
metric_metadata = create_metric_metadata(metric)
311-
print(metric_metadata)
312311
if isinstance(test_case, ConversationalTestCase):
313312
# index hardcoded as the last message for now
314313
api_test_case.update(

deepeval/metrics/answer_relevancy/answer_relevancy.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
trimAndLoadJson,
99
check_llm_test_case_params,
1010
initialize_model,
11+
print_intermediate_steps,
1112
)
1213
from deepeval.test_case import (
1314
LLMTestCase,
@@ -99,8 +100,12 @@ def measure(
99100
self.reason = self._generate_reason(test_case.input)
100101
self.success = self.score >= self.threshold
101102
if self.verbose_mode:
102-
print(
103-
f"statements: {self.statements}\nverdicts: {self.verdicts}\n"
103+
print_intermediate_steps(
104+
self.__name__,
105+
steps=[
106+
f"Statements:\n{self.statements}\n",
107+
f"Verdicts:\n{self.verdicts}",
108+
],
104109
)
105110
return self.score
106111

@@ -127,8 +132,12 @@ async def a_measure(
127132
self.reason = await self._a_generate_reason(test_case.input)
128133
self.success = self.score >= self.threshold
129134
if self.verbose_mode:
130-
print(
131-
f"statements: {self.statements}\nverdicts: {self.verdicts}\nscore: {self.score}, success: {self.success}, reason: {self.reason}\n"
135+
print_intermediate_steps(
136+
self.__name__,
137+
steps=[
138+
f"Statements:\n{self.statements}\n",
139+
f"Verdicts:\n{self.verdicts}",
140+
],
132141
)
133142
return self.score
134143

deepeval/metrics/bias/bias.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from deepeval.models import DeepEvalBaseLLM
1313
from deepeval.utils import get_or_create_event_loop, generate_uuid
1414
from deepeval.metrics.utils import (
15+
print_intermediate_steps,
1516
validate_conversational_test_case,
1617
trimAndLoadJson,
1718
check_llm_test_case_params,
@@ -100,8 +101,12 @@ def measure(
100101
self.reason = self._generate_reason()
101102
self.success = self.score <= self.threshold
102103
if self.verbose_mode:
103-
print(
104-
f"opinions: {self.opinions}\nverdicts: {self.verdicts}\n"
104+
print_intermediate_steps(
105+
self.__name__,
106+
steps=[
107+
f"Opinions:\n{self.opinions}\n",
108+
f"Verdicts:\n{self.verdicts}",
109+
],
105110
)
106111
return self.score
107112

@@ -128,7 +133,13 @@ async def a_measure(
128133
self.reason = await self._a_generate_reason()
129134
self.success = self.score <= self.threshold
130135
if self.verbose_mode:
131-
print(f"opinions: {self.opinions}\nverdicts: {self.verdicts}\n")
136+
print_intermediate_steps(
137+
self.__name__,
138+
steps=[
139+
f"Opinions:\n{self.opinions}\n",
140+
f"Verdicts:\n{self.verdicts}",
141+
],
142+
)
132143
return self.score
133144

134145
async def _measure_async(

deepeval/metrics/contextual_precision/contextual_precision.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from deepeval.utils import get_or_create_event_loop, generate_uuid
66
from deepeval.metrics.utils import (
7+
print_intermediate_steps,
78
validate_conversational_test_case,
89
trimAndLoadJson,
910
check_llm_test_case_params,
@@ -91,7 +92,12 @@ def measure(
9192
self.reason = self._generate_reason(test_case.input)
9293
self.success = self.score >= self.threshold
9394
if self.verbose_mode:
94-
print(f"verdicts: {self.verdicts}\n")
95+
print_intermediate_steps(
96+
self.__name__,
97+
steps=[
98+
f"Verdicts:\n{self.verdicts}",
99+
],
100+
)
95101
return self.score
96102

97103
async def a_measure(
@@ -120,7 +126,12 @@ async def a_measure(
120126
self.reason = await self._a_generate_reason(test_case.input)
121127
self.success = self.score >= self.threshold
122128
if self.verbose_mode:
123-
print(f"verdicts: {self.verdicts}\n")
129+
print_intermediate_steps(
130+
self.__name__,
131+
steps=[
132+
f"Verdicts:\n{self.verdicts}",
133+
],
134+
)
124135
return self.score
125136

126137
async def _measure_async(

deepeval/metrics/contextual_recall/contextual_recall.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from deepeval.utils import get_or_create_event_loop, generate_uuid
66
from deepeval.metrics.utils import (
7+
print_intermediate_steps,
78
validate_conversational_test_case,
89
trimAndLoadJson,
910
check_llm_test_case_params,
@@ -86,7 +87,12 @@ def measure(
8687
self.reason = self._generate_reason(test_case.input)
8788
self.success = self.score >= self.threshold
8889
if self.verbose_mode:
89-
print(f"verdicts: {self.verdicts}\n")
90+
print_intermediate_steps(
91+
self.__name__,
92+
steps=[
93+
f"Verdicts:\n{self.verdicts}",
94+
],
95+
)
9096
return self.score
9197

9298
async def a_measure(
@@ -113,7 +119,12 @@ async def a_measure(
113119
self.reason = await self._a_generate_reason(test_case.input)
114120
self.success = self.score >= self.threshold
115121
if self.verbose_mode:
116-
print(f"verdicts: {self.verdicts}\n")
122+
print_intermediate_steps(
123+
self.__name__,
124+
steps=[
125+
f"Verdicts:\n{self.verdicts}",
126+
],
127+
)
117128
return self.score
118129

119130
async def _measure_async(

deepeval/metrics/contextual_relevancy/contextual_relevancy.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from deepeval.utils import get_or_create_event_loop, generate_uuid
77
from deepeval.metrics.utils import (
8+
print_intermediate_steps,
89
validate_conversational_test_case,
910
trimAndLoadJson,
1011
check_llm_test_case_params,
@@ -88,7 +89,12 @@ def measure(
8889
self.reason = self._generate_reason(test_case.input)
8990
self.success = self.score >= self.threshold
9091
if self.verbose_mode:
91-
print(f"verdicts: {self.verdicts}\n")
92+
print_intermediate_steps(
93+
self.__name__,
94+
steps=[
95+
f"Verdicts:\n{self.verdicts}",
96+
],
97+
)
9298
return self.score
9399

94100
async def a_measure(
@@ -115,7 +121,12 @@ async def a_measure(
115121
self.reason = await self._a_generate_reason(test_case.input)
116122
self.success = self.score >= self.threshold
117123
if self.verbose_mode:
118-
print(f"verdicts: {self.verdicts}\n")
124+
print_intermediate_steps(
125+
self.__name__,
126+
steps=[
127+
f"Verdicts:\n{self.verdicts}",
128+
],
129+
)
119130
return self.score
120131

121132
async def _measure_async(

deepeval/metrics/faithfulness/faithfulness.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from deepeval.metrics import BaseMetric
1212
from deepeval.utils import get_or_create_event_loop, generate_uuid
1313
from deepeval.metrics.utils import (
14+
print_intermediate_steps,
1415
validate_conversational_test_case,
1516
trimAndLoadJson,
1617
check_llm_test_case_params,
@@ -118,8 +119,13 @@ def measure(
118119
self.reason = self._generate_reason()
119120
self.success = self.score >= self.threshold
120121
if self.verbose_mode:
121-
print(
122-
f"truths: {self.truths}\nclaims: {self.claims}\nverdicts: {self.verdicts}\n"
122+
print_intermediate_steps(
123+
self.__name__,
124+
steps=[
125+
f"Truths:\n{self.truths}",
126+
f"Claims:\n{self.claims}",
127+
f"Verdicts:\n{self.verdicts}",
128+
],
123129
)
124130
return self.score
125131

@@ -147,8 +153,13 @@ async def a_measure(
147153
self.reason = await self._a_generate_reason()
148154
self.success = self.score >= self.threshold
149155
if self.verbose_mode:
150-
print(
151-
f"truths: {self.truths}\nclaims: {self.claims}\nverdicts: {self.verdicts}\n"
156+
print_intermediate_steps(
157+
self.__name__,
158+
steps=[
159+
f"Truths:\n{self.truths}",
160+
f"Claims:\n{self.claims}",
161+
f"Verdicts:\n{self.verdicts}",
162+
],
152163
)
153164
return self.score
154165

deepeval/metrics/g_eval/g_eval.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from deepeval.metrics.g_eval.template import GEvalTemplate
1515
from deepeval.utils import get_or_create_event_loop, generate_uuid
1616
from deepeval.metrics.utils import (
17+
print_intermediate_steps,
1718
validate_conversational_test_case,
1819
trimAndLoadJson,
1920
check_llm_test_case_params,
@@ -138,7 +139,12 @@ def measure(
138139
)
139140
self.success = self.score >= self.threshold
140141
if self.verbose_mode:
141-
print(f"evaluation_steps: {self.evaluation_steps}\n")
142+
print_intermediate_steps(
143+
self.__name__,
144+
steps=[
145+
f"Evaluation Steps:\n{self.evaluation_steps}",
146+
],
147+
)
142148
return self.score
143149

144150
async def a_measure(
@@ -169,7 +175,12 @@ async def a_measure(
169175
)
170176
self.success = self.score >= self.threshold
171177
if self.verbose_mode:
172-
print(f"evaluation_steps: {self.evaluation_steps}\n")
178+
print_intermediate_steps(
179+
self.__name__,
180+
steps=[
181+
f"Evaluation Steps:\n{self.evaluation_steps}",
182+
],
183+
)
173184
return self.score
174185

175186
async def _measure_async(

deepeval/metrics/hallucination/hallucination.py

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from deepeval.metrics import BaseMetric
1111
from deepeval.utils import get_or_create_event_loop, generate_uuid
1212
from deepeval.metrics.utils import (
13+
print_intermediate_steps,
1314
validate_conversational_test_case,
1415
trimAndLoadJson,
1516
check_llm_test_case_params,
@@ -86,7 +87,12 @@ def measure(
8687
self.reason = self._generate_reason()
8788
self.success = self.score <= self.threshold
8889
if self.verbose_mode:
89-
print(f"verdicts: {self.verdicts}\n")
90+
print_intermediate_steps(
91+
self.__name__,
92+
steps=[
93+
f"Verdicts:\n{self.verdicts}",
94+
],
95+
)
9096
return self.score
9197

9298
async def a_measure(
@@ -111,7 +117,12 @@ async def a_measure(
111117
self.reason = await self._a_generate_reason()
112118
self.success = self.score <= self.threshold
113119
if self.verbose_mode:
114-
print(f"verdicts: {self.verdicts}\n")
120+
print_intermediate_steps(
121+
self.__name__,
122+
steps=[
123+
f"Verdicts:\n{self.verdicts}",
124+
],
125+
)
115126
return self.score
116127

117128
async def _measure_async(

deepeval/metrics/knowledge_retention/knowledge_retention.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from deepeval.test_case import ConversationalTestCase
66
from deepeval.metrics import BaseConversationalMetric
77
from deepeval.metrics.utils import (
8+
print_intermediate_steps,
89
validate_conversational_test_case,
910
trimAndLoadJson,
1011
initialize_model,
@@ -75,8 +76,12 @@ def measure(self, test_case: ConversationalTestCase):
7576
self.success = knowledge_retention_score >= self.threshold
7677
self.score = knowledge_retention_score
7778
if self.verbose_mode:
78-
print(
79-
f"knowledges: {self.knowledges}\nverdicts: {self.verdicts}\n"
79+
print_intermediate_steps(
80+
self.__name__,
81+
steps=[
82+
f"Knowledges:\n{self.knowledges}",
83+
f"Verdicts:\n{self.verdicts}",
84+
],
8085
)
8186
return self.score
8287

deepeval/metrics/summarization/summarization.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from deepeval.models import DeepEvalBaseLLM
1414
from deepeval.utils import get_or_create_event_loop, generate_uuid
1515
from deepeval.metrics.utils import (
16+
print_intermediate_steps,
1617
validate_conversational_test_case,
1718
trimAndLoadJson,
1819
check_llm_test_case_params,
@@ -192,8 +193,15 @@ def measure(
192193
self.reason = self._generate_reason()
193194
self.success = self.score >= self.threshold
194195
if self.verbose_mode:
195-
print(
196-
f"truths: {self.truths}\nclaims: {self.claims}\ncoverage_verdicts: {self.coverage_verdicts}\nalignment_verdicts: {self.alignment_verdicts}\n"
196+
print_intermediate_steps(
197+
self.__name__,
198+
steps=[
199+
f"Truths:\n{self.truths}",
200+
f"Claims:\n{self.claims}",
201+
f"Assessment Questions:\n{self.assessment_questions}",
202+
f"Coverage Verdicts:\n{self.coverage_verdicts}",
203+
f"Alignment Verdicts:\n{self.alignment_verdicts}",
204+
],
197205
)
198206
return self.score
199207

@@ -234,8 +242,15 @@ async def a_measure(
234242
self.reason = await self._a_generate_reason()
235243
self.success = self.score >= self.threshold
236244
if self.verbose_mode:
237-
print(
238-
f"truths: {self.truths}\nclaims: {self.claims}\ncoverage_verdicts: {self.coverage_verdicts}\nalignment_verdicts: {self.alignment_verdicts}\n"
245+
print_intermediate_steps(
246+
self.__name__,
247+
steps=[
248+
f"Truths:\n{self.truths}",
249+
f"Claims:\n{self.claims}",
250+
f"Assessment Questions:\n{self.assessment_questions}",
251+
f"Coverage Verdicts:\n{self.coverage_verdicts}",
252+
f"Alignment Verdicts:\n{self.alignment_verdicts}",
253+
],
239254
)
240255
return self.score
241256

0 commit comments

Comments
 (0)