.

penguine-ip · penguine-ip · commit 90ddcd9e250f · 2024-06-20T19:15:08.000+08:00
diff --git a/deepeval/metrics/answer_relevancy/answer_relevancy.py b/deepeval/metrics/answer_relevancy/answer_relevancy.py
@@ -61,6 +61,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._statements: ContextVar[Optional[List[str]]] = ContextVar(
             generate_uuid(), default=None
         )
diff --git a/deepeval/metrics/base_metric.py b/deepeval/metrics/base_metric.py
@@ -7,21 +7,7 @@
 from deepeval.utils import generate_uuid
 
 
-class MetricMeta(type):
-    def __new__(cls, name, bases, attrs):
-        # Create the new class
-        new_class = super().__new__(cls, name, bases, attrs)
-        # Assign new ContextVar instances with unique identifiers
-        new_class._score = ContextVar(str(uuid.uuid4()), default=None)
-        new_class._score_breakdown = ContextVar(str(uuid.uuid4()), default=None)
-        new_class._reason = ContextVar(str(uuid.uuid4()), default=None)
-        new_class._success = ContextVar(str(uuid.uuid4()), default=None)
-        new_class._error = ContextVar(str(uuid.uuid4()), default=None)
-        return new_class
-
-
-class BaseMetric(metaclass=MetricMeta):
-
+class BaseMetric:
     evaluation_model: Optional[str] = None
     strict_mode: bool = False
     async_mode: bool = True
@@ -85,6 +71,13 @@ def threshold(self) -> float:
     def threshold(self, value: float):
         self._threshold = value
 
+    def __init__(self):
+        self._score = ContextVar(str(uuid.uuid4()), default=None)
+        self._score_breakdown = ContextVar(str(uuid.uuid4()), default=None)
+        self._reason = ContextVar(str(uuid.uuid4()), default=None)
+        self._success = ContextVar(str(uuid.uuid4()), default=None)
+        self._error = ContextVar(str(uuid.uuid4()), default=None)
+
     @abstractmethod
     def measure(self, test_case: LLMTestCase, *args, **kwargs) -> float:
         raise NotImplementedError
diff --git a/deepeval/metrics/bias/bias.py b/deepeval/metrics/bias/bias.py
@@ -63,6 +63,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._opinions: ContextVar[Optional[List[str]]] = ContextVar(
             generate_uuid(), default=None
         )
diff --git a/deepeval/metrics/contextual_precision/contextual_precision.py b/deepeval/metrics/contextual_precision/contextual_precision.py
@@ -58,6 +58,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._verdicts: ContextVar[
             Optional[List[ContextualPrecisionVerdict]]
         ] = ContextVar(generate_uuid(), default=None)
diff --git a/deepeval/metrics/contextual_recall/contextual_recall.py b/deepeval/metrics/contextual_recall/contextual_recall.py
@@ -55,6 +55,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._verdicts: ContextVar[Optional[List[ContextualRecallVerdict]]] = (
             ContextVar(generate_uuid(), default=None)
         )
diff --git a/deepeval/metrics/contextual_relevancy/contextual_relevancy.py b/deepeval/metrics/contextual_relevancy/contextual_relevancy.py
@@ -57,6 +57,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._verdicts: ContextVar[
             Optional[List[ContextualRelevancyVerdict]]
         ] = ContextVar(generate_uuid(), default=None)
diff --git a/deepeval/metrics/faithfulness/faithfulness.py b/deepeval/metrics/faithfulness/faithfulness.py
@@ -72,6 +72,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._truths: ContextVar[Optional[List[str]]] = ContextVar(
             generate_uuid(), default=None
         )
diff --git a/deepeval/metrics/g_eval/g_eval.py b/deepeval/metrics/g_eval/g_eval.py
@@ -80,6 +80,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._evaluation_steps: ContextVar[Optional[List[str]]] = ContextVar(
             generate_uuid(), default=None
         )
diff --git a/deepeval/metrics/hallucination/hallucination.py b/deepeval/metrics/hallucination/hallucination.py
@@ -55,6 +55,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._verdicts: ContextVar[Optional[List[HallucinationVerdict]]] = (
             ContextVar(generate_uuid(), default=None)
         )
diff --git a/deepeval/metrics/summarization/summarization.py b/deepeval/metrics/summarization/summarization.py
@@ -117,6 +117,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._truths: ContextVar[Optional[List[str]]] = ContextVar(
             generate_uuid(), default=None
         )
diff --git a/deepeval/metrics/toxicity/toxicity.py b/deepeval/metrics/toxicity/toxicity.py
@@ -63,6 +63,7 @@ def __init__(
         strict_mode: bool = False,
         verbose_mode: bool = False,
     ):
+        super().__init__()
         self._opinions: ContextVar[Optional[List[str]]] = ContextVar(
             generate_uuid(), default=None
         )
diff --git a/tests/test_everything.py b/tests/test_everything.py
@@ -128,6 +128,18 @@ def test_everything():
         verbose_mode=verbose_mode,
     )
 
+    metric11 = GEval(
+        name="Relevancy",
+        criteria="Relevancy - determine if the actual output is relevant with the input.",
+        evaluation_params=[
+            LLMTestCaseParams.INPUT,
+            LLMTestCaseParams.ACTUAL_OUTPUT,
+        ],
+        strict_mode=strict_mode,
+        model="gpt-4-0125-preview",
+        verbose_mode=verbose_mode,
+    )
+
     test_case = LLMTestCase(
         input="What is this",
         actual_output="this is a latte",
@@ -140,15 +152,16 @@ def test_everything():
         test_case,
         [
             metric1,
-            metric2,
-            metric3,
-            metric4,
-            metric5,
+            # metric2,
+            # metric3,
+            # metric4,
+            # metric5,
             # metric6,
             # metric7,
             # metric8,
             # metric9,
-            # metric10,
+            metric10,
+            metric11,
         ],
         # run_async=False,
     )

Original file line number	Diff line number	Diff line change
`@@ -61,6 +61,7 @@ def __init__(`
`61`	`61`	`strict_mode: bool = False,`
`62`	`62`	`verbose_mode: bool = False,`
`63`	`63`	`):`
	`64`	`+ super().__init__()`
`64`	`65`	`self._statements: ContextVar[Optional[List[str]]] = ContextVar(`
`65`	`66`	`generate_uuid(), default=None`
`66`	`67`	`)`
Original file line number	Diff line number	Diff line change
`@@ -63,6 +63,7 @@ def __init__(`
`63`	`63`	`strict_mode: bool = False,`
`64`	`64`	`verbose_mode: bool = False,`
`65`	`65`	`):`
	`66`	`+ super().__init__()`
`66`	`67`	`self._opinions: ContextVar[Optional[List[str]]] = ContextVar(`
`67`	`68`	`generate_uuid(), default=None`
`68`	`69`	`)`
Original file line number	Diff line number	Diff line change
`@@ -55,6 +55,7 @@ def __init__(`
`55`	`55`	`strict_mode: bool = False,`
`56`	`56`	`verbose_mode: bool = False,`
`57`	`57`	`):`
	`58`	`+ super().__init__()`
`58`	`59`	`self._verdicts: ContextVar[Optional[List[ContextualRecallVerdict]]] = (`
`59`	`60`	`ContextVar(generate_uuid(), default=None)`
`60`	`61`	`)`
Original file line number	Diff line number	Diff line change
`@@ -72,6 +72,7 @@ def __init__(`
`72`	`72`	`strict_mode: bool = False,`
`73`	`73`	`verbose_mode: bool = False,`
`74`	`74`	`):`
	`75`	`+ super().__init__()`
`75`	`76`	`self._truths: ContextVar[Optional[List[str]]] = ContextVar(`
`76`	`77`	`generate_uuid(), default=None`
`77`	`78`	`)`
Original file line number	Diff line number	Diff line change
`@@ -80,6 +80,7 @@ def __init__(`
`80`	`80`	`strict_mode: bool = False,`
`81`	`81`	`verbose_mode: bool = False,`
`82`	`82`	`):`
	`83`	`+ super().__init__()`
`83`	`84`	`self._evaluation_steps: ContextVar[Optional[List[str]]] = ContextVar(`
`84`	`85`	`generate_uuid(), default=None`
`85`	`86`	`)`
Original file line number	Diff line number	Diff line change
`@@ -117,6 +117,7 @@ def __init__(`
`117`	`117`	`strict_mode: bool = False,`
`118`	`118`	`verbose_mode: bool = False,`
`119`	`119`	`):`
	`120`	`+ super().__init__()`
`120`	`121`	`self._truths: ContextVar[Optional[List[str]]] = ContextVar(`
`121`	`122`	`generate_uuid(), default=None`
`122`	`123`	`)`