Skip to content

Commit e54e6b2

Browse files
committed
Added verbose mode
1 parent e49b077 commit e54e6b2

File tree

14 files changed

+72
-101
lines changed

14 files changed

+72
-101
lines changed

deepeval/metrics/answer_relevancy/answer_relevancy.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def __init__(
5454
include_reason: bool = True,
5555
async_mode: bool = True,
5656
strict_mode: bool = False,
57+
verbose_mode: bool = False,
5758
):
5859
super().__init__()
5960
self._statements: ContextVar[Optional[List[str]]] = ContextVar(
@@ -68,11 +69,10 @@ def __init__(
6869
self.include_reason = include_reason
6970
self.async_mode = async_mode
7071
self.strict_mode = strict_mode
72+
self.verbose_mode = verbose_mode
7173

7274
def measure(
73-
self,
74-
test_case: Union[LLMTestCase, ConversationalTestCase],
75-
verbose: bool = True,
75+
self, test_case: Union[LLMTestCase, ConversationalTestCase]
7676
) -> float:
7777
if isinstance(test_case, ConversationalTestCase):
7878
test_case = validate_conversational_test_case(test_case, self)
@@ -88,9 +88,7 @@ def measure(
8888
self.score,
8989
self.reason,
9090
self.success,
91-
) = loop.run_until_complete(
92-
self._measure_async(test_case, verbose)
93-
)
91+
) = loop.run_until_complete(self._measure_async(test_case))
9492
else:
9593
self.statements: List[str] = self._generate_statements(
9694
test_case.actual_output
@@ -101,7 +99,7 @@ def measure(
10199
self.score = self._calculate_score()
102100
self.reason = self._generate_reason(test_case.input)
103101
self.success = self.score >= self.threshold
104-
if verbose:
102+
if self.verbose_mode:
105103
print(
106104
f"statements: {self.statements}\nverdicts: {self.verdicts}\n"
107105
)
@@ -111,7 +109,6 @@ async def a_measure(
111109
self,
112110
test_case: Union[LLMTestCase, ConversationalTestCase],
113111
_show_indicator: bool = True,
114-
verbose: bool = True,
115112
) -> float:
116113
if isinstance(test_case, ConversationalTestCase):
117114
test_case = validate_conversational_test_case(test_case, self)
@@ -130,18 +127,16 @@ async def a_measure(
130127
self.score = self._calculate_score()
131128
self.reason = await self._a_generate_reason(test_case.input)
132129
self.success = self.score >= self.threshold
133-
if verbose:
130+
if self.verbose_mode:
134131
print(
135132
f"statements: {self.statements}\nverdicts: {self.verdicts}\nscore: {self.score}, success: {self.success}\n"
136133
)
137134
return self.score
138135

139136
async def _measure_async(
140-
self,
141-
test_case: Union[LLMTestCase, ConversationalTestCase],
142-
verbose: bool,
137+
self, test_case: Union[LLMTestCase, ConversationalTestCase]
143138
):
144-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
139+
await self.a_measure(test_case, _show_indicator=False)
145140
return (
146141
self.statements,
147142
self.verdicts,

deepeval/metrics/base_metric.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ class BaseMetric:
1212
evaluation_model: Optional[str] = None
1313
strict_mode: bool = False
1414
async_mode: bool = True
15+
verbose_mode: bool = False
1516
include_reason: bool = False
1617
evaluation_cost: Optional[float] = None
1718

@@ -113,6 +114,7 @@ class BaseConversationalMetric:
113114
# Not changeable for now
114115
strict_mode: bool = False
115116
async_mode: bool = False
117+
verbose_mode: bool = False
116118

117119
def __init__(self):
118120
self._score = ContextVar(generate_uuid(), default=None)

deepeval/metrics/bias/bias.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ def __init__(
5656
include_reason: bool = True,
5757
async_mode: bool = True,
5858
strict_mode: bool = False,
59+
verbose_mode: bool = False,
5960
):
6061
super().__init__()
6162
self._opinions: ContextVar[Optional[List[str]]] = ContextVar(
@@ -70,11 +71,11 @@ def __init__(
7071
self.include_reason = include_reason
7172
self.async_mode = async_mode
7273
self.strict_mode = strict_mode
74+
self.verbose_mode = verbose_mode
7375

7476
def measure(
7577
self,
7678
test_case: Union[LLMTestCase, ConversationalTestCase],
77-
verbose: bool = True,
7879
) -> float:
7980
if isinstance(test_case, ConversationalTestCase):
8081
test_case = validate_conversational_test_case(test_case, self)
@@ -90,9 +91,7 @@ def measure(
9091
self.score,
9192
self.reason,
9293
self.success,
93-
) = loop.run_until_complete(
94-
self._measure_async(test_case, verbose)
95-
)
94+
) = loop.run_until_complete(self._measure_async(test_case))
9695
else:
9796
self.opinions: List[str] = self._generate_opinions(
9897
test_case.actual_output
@@ -101,7 +100,7 @@ def measure(
101100
self.score = self._calculate_score()
102101
self.reason = self._generate_reason()
103102
self.success = self.score <= self.threshold
104-
if verbose:
103+
if self.verbose_mode:
105104
print(
106105
f"opinions: {self.opinions}\nverdicts: {self.verdicts}\n"
107106
)
@@ -111,7 +110,6 @@ async def a_measure(
111110
self,
112111
test_case: Union[LLMTestCase, ConversationalTestCase],
113112
_show_indicator: bool = True,
114-
verbose: bool = True,
115113
) -> float:
116114
if isinstance(test_case, ConversationalTestCase):
117115
test_case = validate_conversational_test_case(test_case, self)
@@ -130,16 +128,14 @@ async def a_measure(
130128
self.score = self._calculate_score()
131129
self.reason = await self._a_generate_reason()
132130
self.success = self.score <= self.threshold
133-
if verbose:
131+
if self.verbose_mode:
134132
print(f"opinions: {self.opinions}\nverdicts: {self.verdicts}\n")
135133
return self.score
136134

137135
async def _measure_async(
138-
self,
139-
test_case: Union[LLMTestCase, ConversationalTestCase],
140-
verbose: bool,
136+
self, test_case: Union[LLMTestCase, ConversationalTestCase]
141137
):
142-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
138+
await self.a_measure(test_case, _show_indicator=False)
143139
return (
144140
self.opinions,
145141
self.verdicts,

deepeval/metrics/contextual_precision/contextual_precision.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def __init__(
5151
include_reason: bool = True,
5252
async_mode: bool = True,
5353
strict_mode: bool = False,
54+
verbose_mode: bool = False,
5455
):
5556
super().__init__()
5657
self._verdicts: ContextVar[
@@ -62,11 +63,11 @@ def __init__(
6263
self.evaluation_model = self.model.get_model_name()
6364
self.async_mode = async_mode
6465
self.strict_mode = strict_mode
66+
self.verbose_mode = verbose_mode
6567

6668
def measure(
6769
self,
6870
test_case: Union[LLMTestCase, ConversationalTestCase],
69-
verbose: bool = True,
7071
) -> float:
7172
if isinstance(test_case, ConversationalTestCase):
7273
test_case = validate_conversational_test_case(test_case, self)
@@ -77,9 +78,7 @@ def measure(
7778
if self.async_mode:
7879
loop = get_or_create_event_loop()
7980
(self.verdicts, self.score, self.reason, self.success) = (
80-
loop.run_until_complete(
81-
self._measure_async(test_case, verbose)
82-
)
81+
loop.run_until_complete(self._measure_async(test_case))
8382
)
8483
else:
8584
self.verdicts: List[ContextualPrecisionVerdict] = (
@@ -92,15 +91,14 @@ def measure(
9291
self.score = self._calculate_score()
9392
self.reason = self._generate_reason(test_case.input)
9493
self.success = self.score >= self.threshold
95-
if verbose:
94+
if self.verbose_mode:
9695
print(f"verdicts: {self.verdicts}\n")
9796
return self.score
9897

9998
async def a_measure(
10099
self,
101100
test_case: Union[LLMTestCase, ConversationalTestCase],
102101
_show_indicator: bool = True,
103-
verbose: bool = True,
104102
) -> float:
105103
if isinstance(test_case, ConversationalTestCase):
106104
test_case = validate_conversational_test_case(test_case, self)
@@ -122,16 +120,15 @@ async def a_measure(
122120
self.score = self._calculate_score()
123121
self.reason = await self._a_generate_reason(test_case.input)
124122
self.success = self.score >= self.threshold
125-
if verbose:
123+
if self.verbose_mode:
126124
print(f"verdicts: {self.verdicts}\n")
127125
return self.score
128126

129127
async def _measure_async(
130128
self,
131129
test_case: Union[LLMTestCase, ConversationalTestCase],
132-
verbose: bool,
133130
):
134-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
131+
await self.a_measure(test_case, _show_indicator=False)
135132
return (self.verdicts, self.score, self.reason, self.success)
136133

137134
async def _a_generate_reason(self, input: str):

deepeval/metrics/contextual_recall/contextual_recall.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __init__(
4848
include_reason: bool = True,
4949
async_mode: bool = True,
5050
strict_mode: bool = False,
51+
verbose_mode: bool = False,
5152
):
5253
super().__init__()
5354
self._verdicts: ContextVar[Optional[List[ContextualRecallVerdict]]] = (
@@ -59,11 +60,11 @@ def __init__(
5960
self.include_reason = include_reason
6061
self.async_mode = async_mode
6162
self.strict_mode = strict_mode
63+
self.verbose_mode = verbose_mode
6264

6365
def measure(
6466
self,
6567
test_case: Union[LLMTestCase, ConversationalTestCase],
66-
verbose: bool = True,
6768
) -> float:
6869
if isinstance(test_case, ConversationalTestCase):
6970
test_case = validate_conversational_test_case(test_case, self)
@@ -74,9 +75,7 @@ def measure(
7475
if self.async_mode:
7576
loop = get_or_create_event_loop()
7677
(self.verdicts, self.score, self.reason, self.success) = (
77-
loop.run_until_complete(
78-
self._measure_async(test_case, verbose)
79-
)
78+
loop.run_until_complete(self._measure_async(test_case))
8079
)
8180
else:
8281
self.verdicts: List[ContextualRecallVerdict] = (
@@ -87,15 +86,14 @@ def measure(
8786
self.score = self._calculate_score()
8887
self.reason = self._generate_reason(test_case.input)
8988
self.success = self.score >= self.threshold
90-
if verbose:
89+
if self.verbose_mode:
9190
print(f"verdicts: {self.verdicts}\n")
9291
return self.score
9392

9493
async def a_measure(
9594
self,
9695
test_case: Union[LLMTestCase, ConversationalTestCase],
9796
_show_indicator: bool = True,
98-
verbose: bool = True,
9997
) -> float:
10098
if isinstance(test_case, ConversationalTestCase):
10199
test_case = validate_conversational_test_case(test_case, self)
@@ -115,16 +113,15 @@ async def a_measure(
115113
self.score = self._calculate_score()
116114
self.reason = await self._a_generate_reason(test_case.input)
117115
self.success = self.score >= self.threshold
118-
if verbose:
116+
if self.verbose_mode:
119117
print(f"verdicts: {self.verdicts}\n")
120118
return self.score
121119

122120
async def _measure_async(
123121
self,
124122
test_case: Union[LLMTestCase, ConversationalTestCase],
125-
verbose: bool,
126123
):
127-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
124+
await self.a_measure(test_case, _show_indicator=False)
128125
return (self.verdicts, self.score, self.reason, self.success)
129126

130127
async def _a_generate_reason(self, expected_output: str):

deepeval/metrics/contextual_relevancy/contextual_relevancy.py

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(
5050
include_reason: bool = True,
5151
async_mode: bool = True,
5252
strict_mode: bool = False,
53+
verbose_mode: bool = False,
5354
):
5455
super().__init__()
5556
self._verdicts: ContextVar[
@@ -61,11 +62,11 @@ def __init__(
6162
self.include_reason = include_reason
6263
self.async_mode = async_mode
6364
self.strict_mode = strict_mode
65+
self.verbose_mode = verbose_mode
6466

6567
def measure(
6668
self,
6769
test_case: Union[LLMTestCase, ConversationalTestCase],
68-
verbose: bool = True,
6970
) -> float:
7071
if isinstance(test_case, ConversationalTestCase):
7172
test_case = validate_conversational_test_case(test_case, self)
@@ -76,9 +77,7 @@ def measure(
7677
if self.async_mode:
7778
loop = get_or_create_event_loop()
7879
(self.verdicts, self.score, self.reason, self.success) = (
79-
loop.run_until_complete(
80-
self._measure_async(test_case, verbose)
81-
)
80+
loop.run_until_complete(self._measure_async(test_case))
8281
)
8382
else:
8483
self.verdicts: List[ContextualRelevancyVerdict] = (
@@ -89,15 +88,14 @@ def measure(
8988
self.score = self._calculate_score()
9089
self.reason = self._generate_reason(test_case.input)
9190
self.success = self.score >= self.threshold
92-
if verbose:
91+
if self.verbose_mode:
9392
print(f"verdicts: {self.verdicts}\n")
9493
return self.score
9594

9695
async def a_measure(
9796
self,
9897
test_case: Union[LLMTestCase, ConversationalTestCase],
9998
_show_indicator: bool = True,
100-
verbose: bool = True,
10199
) -> float:
102100
if isinstance(test_case, ConversationalTestCase):
103101
test_case = validate_conversational_test_case(test_case, self)
@@ -117,16 +115,15 @@ async def a_measure(
117115
self.score = self._calculate_score()
118116
self.reason = await self._a_generate_reason(test_case.input)
119117
self.success = self.score >= self.threshold
120-
if verbose:
118+
if self.verbose_mode:
121119
print(f"verdicts: {self.verdicts}\n")
122120
return self.score
123121

124122
async def _measure_async(
125123
self,
126124
test_case: Union[LLMTestCase, ConversationalTestCase],
127-
verbose: bool,
128125
):
129-
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
126+
await self.a_measure(test_case, _show_indicator=False)
130127
return (self.verdicts, self.score, self.reason, self.success)
131128

132129
async def _a_generate_reason(self, input: str):

0 commit comments

Comments
 (0)