Skip to content

Commit 5dfc777

Browse files
committed
Changed to UUID
1 parent 700a1b8 commit 5dfc777

File tree

19 files changed

+484
-517
lines changed

19 files changed

+484
-517
lines changed

deepeval/evaluate.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,12 @@
2323
ConversationalApiTestCase,
2424
MetricMetadata,
2525
)
26-
from deepeval.utils import get_is_running_deepeval, set_indicator, capture_contextvars, update_contextvars
26+
from deepeval.utils import (
27+
get_is_running_deepeval,
28+
set_indicator,
29+
capture_contextvars,
30+
update_contextvars,
31+
)
2732
from deepeval.test_run.cache import (
2833
test_run_cache_manager,
2934
Cache,
@@ -300,7 +305,7 @@ async def a_execute_test_cases(
300305
await measure_metrics_with_indicator(
301306
metrics, test_case, cached_test_case, ignore_errors
302307
)
303-
308+
304309
for metric in metrics:
305310
metric_metadata = create_metric_metadata(metric)
306311
if isinstance(test_case, ConversationalTestCase):

deepeval/metrics/answer_relevancy/answer_relevancy.py

Lines changed: 38 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from typing import Optional, List, Union
33
from pydantic import BaseModel, Field
44

5-
from deepeval.utils import get_or_create_event_loop
5+
from deepeval.utils import get_or_create_event_loop, generate_uuid
66
from deepeval.metrics.utils import (
77
validate_conversational_test_case,
88
trimAndLoadJson,
@@ -41,32 +41,38 @@ def __init__(
4141
strict_mode: bool = False,
4242
):
4343
super().__init__()
44-
self._statements: ContextVar[Optional[List[str]]] = ContextVar(f'{self.__class__.__name__}_statements', default=None)
45-
self._verdicts: ContextVar[Optional[List[AnswerRelvancyVerdict]]] = ContextVar(f'{self.__class__.__name__}_verdicts', default=None)
44+
self._statements: ContextVar[Optional[List[str]]] = ContextVar(
45+
generate_uuid(), default=None
46+
)
47+
self._verdicts: ContextVar[Optional[List[AnswerRelvancyVerdict]]] = (
48+
ContextVar(generate_uuid(), default=None)
49+
)
4650
self.threshold = 1 if strict_mode else threshold
4751
self.model, self.using_native_model = initialize_model(model)
4852
self.evaluation_model = self.model.get_model_name()
4953
self.include_reason = include_reason
5054
self.async_mode = async_mode
5155
self.strict_mode = strict_mode
52-
56+
5357
@property
5458
def statements(self) -> Optional[List[str]]:
5559
return self._statements.get()
60+
5661
@statements.setter
5762
def statements(self, value: Optional[List[str]]):
5863
self._statements.set(value)
59-
64+
6065
@property
6166
def verdicts(self) -> Optional[List[AnswerRelvancyVerdict]]:
6267
return self._verdicts.get()
68+
6369
@verdicts.setter
6470
def verdicts(self, value: Optional[List[AnswerRelvancyVerdict]]):
6571
self._verdicts.set(value)
6672

6773
def measure(
68-
self,
69-
test_case: Union[LLMTestCase, ConversationalTestCase],
74+
self,
75+
test_case: Union[LLMTestCase, ConversationalTestCase],
7076
verbose: bool = True,
7177
) -> float:
7278
if isinstance(test_case, ConversationalTestCase):
@@ -78,11 +84,11 @@ def measure(
7884
if self.async_mode:
7985
loop = get_or_create_event_loop()
8086
(
81-
self.statements,
82-
self.verdicts,
83-
self.score,
84-
self.reason,
85-
self.success
87+
self.statements,
88+
self.verdicts,
89+
self.score,
90+
self.reason,
91+
self.success,
8692
) = loop.run_until_complete(
8793
self._measure_async(test_case, verbose)
8894
)
@@ -95,27 +101,30 @@ def measure(
95101
self.reason = self._generate_reason(test_case.input)
96102
self.success = self.score >= self.threshold
97103
if verbose:
98-
print(f"statements: {self.statements}\nverdicts: {self.verdicts}\n")
104+
print(
105+
f"statements: {self.statements}\nverdicts: {self.verdicts}\n"
106+
)
99107
return self.score
100-
108+
101109
async def _measure_async(
102-
self,
103-
test_case: Union[LLMTestCase, ConversationalTestCase],
104-
verbose: bool):
110+
self,
111+
test_case: Union[LLMTestCase, ConversationalTestCase],
112+
verbose: bool,
113+
):
105114
await self.a_measure(test_case, _show_indicator=False, verbose=verbose)
106115
return (
107-
self.statements,
108-
self.verdicts,
109-
self.score,
110-
self.reason,
111-
self.success
112-
)
116+
self.statements,
117+
self.verdicts,
118+
self.score,
119+
self.reason,
120+
self.success,
121+
)
113122

114123
async def a_measure(
115124
self,
116125
test_case: Union[LLMTestCase, ConversationalTestCase],
117126
_show_indicator: bool = True,
118-
verbose: bool = True
127+
verbose: bool = True,
119128
) -> float:
120129
if isinstance(test_case, ConversationalTestCase):
121130
test_case = validate_conversational_test_case(test_case, self)
@@ -128,14 +137,14 @@ async def a_measure(
128137
self.statements = await self._a_generate_statements(
129138
test_case.actual_output
130139
)
131-
self.verdicts = (
132-
await self._a_generate_verdicts(test_case.input)
133-
)
140+
self.verdicts = await self._a_generate_verdicts(test_case.input)
134141
self.score = self._calculate_score()
135142
self.reason = await self._a_generate_reason(test_case.input)
136143
self.success = self.score >= self.threshold
137144
if verbose:
138-
print(f"statements: {self.statements}\nverdicts: {self.verdicts}\nscore: {self.score}, success: {self.success}\n")
145+
print(
146+
f"statements: {self.statements}\nverdicts: {self.verdicts}\nscore: {self.score}, success: {self.success}\n"
147+
)
139148
return self.score
140149

141150
async def _a_generate_reason(self, input: str) -> str:
@@ -275,4 +284,4 @@ def is_successful(self) -> bool:
275284

276285
@property
277286
def __name__(self):
278-
return "Answer Relevancy"
287+
return "Answer Relevancy"

deepeval/metrics/base_metric.py

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
from abc import abstractmethod
22
from contextvars import ContextVar
33
from typing import Optional, Dict
4+
import uuid
45

56
from deepeval.test_case import LLMTestCase, ConversationalTestCase
7+
from deepeval.utils import generate_uuid
8+
69

710
class BaseMetric:
811

@@ -13,57 +16,74 @@ class BaseMetric:
1316
evaluation_cost: Optional[float] = None
1417

1518
def __init__(self):
16-
self._score: ContextVar[Optional[float]] = ContextVar(f'{self.__class__.__name__}_score', default=None)
17-
self._score_breakdown: ContextVar[Optional[Dict]] = ContextVar(f'{self.__class__.__name__}_score_breakdown', default=None)
18-
self._reason: ContextVar[Optional[str]] = ContextVar(f'{self.__class__.__name__}_reason', default=None)
19-
self._success: ContextVar[Optional[bool]] = ContextVar(f'{self.__class__.__name__}_success', default=None)
20-
self._error: ContextVar[Optional[str]] = ContextVar(f'{self.__class__.__name__}_error', default=None)
19+
self._score: ContextVar[Optional[float]] = ContextVar(
20+
generate_uuid(), default=None
21+
)
22+
self._score_breakdown: ContextVar[Optional[Dict]] = ContextVar(
23+
generate_uuid(), default=None
24+
)
25+
self._reason: ContextVar[Optional[str]] = ContextVar(
26+
generate_uuid(), default=None
27+
)
28+
self._success: ContextVar[Optional[bool]] = ContextVar(
29+
generate_uuid(), default=None
30+
)
31+
self._error: ContextVar[Optional[str]] = ContextVar(
32+
generate_uuid(), default=None
33+
)
2134

2235
@property
2336
def score(self) -> Optional[float]:
2437
return self._score.get()
38+
2539
@score.setter
2640
def score(self, value: Optional[float]) -> None:
2741
self._score.set(value)
2842

2943
@property
3044
def score_breakdown(self) -> Optional[Dict]:
3145
return self._score_breakdown.get()
46+
3247
@score_breakdown.setter
3348
def score_breakdown(self, value: Optional[Dict]) -> None:
3449
self._score_breakdown.set(value)
3550

3651
@property
3752
def reason(self) -> Optional[str]:
3853
return self._reason.get()
54+
3955
@reason.setter
4056
def reason(self, value: Optional[str]) -> None:
4157
self._reason.set(value)
4258

4359
@property
4460
def success(self) -> Optional[bool]:
4561
return self._success.get()
62+
4663
@success.setter
4764
def success(self, value: Optional[bool]) -> None:
4865
self._success.set(value)
4966

5067
@property
5168
def error(self) -> Optional[str]:
5269
return self._error.get()
70+
5371
@error.setter
5472
def error(self, value: Optional[str]) -> None:
5573
self._error.set(value)
5674

5775
@property
5876
def error(self) -> Optional[str]:
5977
return self._error.get()
78+
6079
@error.setter
6180
def error(self, value: Optional[str]) -> None:
6281
self._error.set(value)
6382

6483
@property
6584
def threshold(self) -> float:
6685
return self._threshold
86+
6787
@threshold.setter
6888
def threshold(self, value: float):
6989
self._threshold = value
@@ -95,42 +115,47 @@ class BaseConversationalMetric:
95115
async_mode: bool = False
96116

97117
def __init__(self):
98-
self._score = ContextVar(f'{self.__class__.__name__}_score', default=None)
99-
self._score_breakdown = ContextVar(f'{self.__class__.__name__}_score_breakdown', default=None)
100-
self._reason = ContextVar(f'{self.__class__.__name__}_reason', default=None)
101-
self._error = ContextVar(f'{self.__class__.__name__}_error', default=None)
118+
self._score = ContextVar(generate_uuid(), default=None)
119+
self._score_breakdown = ContextVar(generate_uuid(), default=None)
120+
self._reason = ContextVar(generate_uuid(), default=None)
121+
self._error = ContextVar(generate_uuid(), default=None)
102122

103123
@property
104124
def score(self) -> Optional[float]:
105125
return self._score.get()
126+
106127
@score.setter
107128
def score(self, value: Optional[float]) -> None:
108129
self._score.set(value)
109130

110131
@property
111132
def score_breakdown(self) -> Optional[Dict]:
112133
return self._score_breakdown.get()
134+
113135
@score_breakdown.setter
114136
def score_breakdown(self, value: Optional[Dict]) -> None:
115137
self._score_breakdown.set(value)
116138

117139
@property
118140
def reason(self) -> Optional[str]:
119141
return self._reason.get()
142+
120143
@reason.setter
121144
def reason(self, value: Optional[str]) -> None:
122145
self._reason.set(value)
123146

124147
@property
125148
def error(self) -> Optional[str]:
126149
return self._error.get()
150+
127151
@error.setter
128152
def error(self, value: Optional[str]) -> None:
129153
self._error.set(value)
130154

131155
@property
132156
def threshold(self) -> float:
133157
return self._threshold
158+
134159
@threshold.setter
135160
def threshold(self, value: float):
136161
self._threshold = value

0 commit comments

Comments
 (0)