confident-ai
diff --git a/‎deepeval/metrics/dag/nodes.py
Lines changed: 125 additions & 75 deletions b/‎deepeval/metrics/dag/nodes.py
Lines changed: 125 additions & 75 deletions
@@ -22,7 +22,8 @@
 
 
 class BaseNode:
-    _indegree: Optional[int] = None
+    _indegree: int = 0
+    _depth: int = 0
 
     def set_parent(self, parent: "BaseNode"):
         if hasattr(self, "_parent"):
@@ -46,38 +47,32 @@ async def _a_execute(
 
 
 def increment_indegree(node: BaseNode):
-    if node._indegree is None:
-        node._indegree = 1
-    else:
-        node._indegree += 1
+    node._indegree += 1
 
 
 def decrement_indegree(node: BaseNode):
-    if node._indegree is None:
-        node._indegree = 0
-    else:
-        node._indegree -= 1
+    node._indegree -= 1
 
 
 @dataclass
 class VerdictNode(BaseNode):
     verdict: Union[str, bool]
     score: Optional[int] = None
-    g_eval: Optional[GEval] = None
+    child: Optional[BaseNode] = None
     _parent: Optional[BaseNode] = None
 
     def __hash__(self):
         return id(self)
 
     def __post_init__(self):
         # Ensure either `score` or `g_eval` is set, but not both
-        if self.score is not None and self.g_eval is not None:
+        if self.score is not None and self.child is not None:
             raise ValueError(
-                "A VerdictNode can have either a 'score' or a 'g_eval', but not both."
+                "A VerdictNode can have either a 'score' or a 'child', but not both."
             )
-        if self.score is None and self.g_eval is None:
+        if self.score is None and self.child is None:
             raise ValueError(
-                "A VerdictNode must have either a 'score' or a 'g_eval'."
+                "A VerdictNode must have either a 'score' or a 'child'."
             )
 
         if self.score is not None:
@@ -88,7 +83,7 @@ def __post_init__(self):
 
     def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
         decrement_indegree(self)
-        if self._indegree != 0:
+        if self._indegree > 0:
             return
 
         if isinstance(self._parent, NonBinaryJudgementNode) or isinstance(
@@ -97,26 +92,35 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
             if self._parent._verdict.verdict != self.verdict:
                 return
 
-        if self.g_eval is not None:
-            g_eval_args = {
-                "name": self.g_eval.name,
-                "evaluation_params": self.g_eval.evaluation_params,
-                "model": metric.model,
-                "verbose_mode": metric.verbose_mode,
-            }
-            if self.g_eval.criteria:
-                g_eval_args["criteria"] = self.g_eval.criteria
+        if self.child is not None:
+            if isinstance(self.child, GEval):
+                g_eval_args = {
+                    "name": self.child.name,
+                    "evaluation_params": self.child.evaluation_params,
+                    "model": metric.model,
+                    "verbose_mode": metric.verbose_mode,
+                }
+                if self.child.criteria:
+                    g_eval_args["criteria"] = self.child.criteria
+                else:
+                    g_eval_args["evaluation_steps"] = (
+                        self.child.evaluation_steps
+                    )
+                copied_g_eval = GEval(**g_eval_args)
+
+                copied_g_eval.measure(
+                    test_case=test_case, _show_indicator=False
+                )
+                metric._verbose_steps.append(
+                    construct_node_verbose_log(self, depth, copied_g_eval)
+                )
+                metric.score = copied_g_eval.score
+                if metric.include_reason:
+                    metric.reason = copied_g_eval.reason
             else:
-                g_eval_args["evaluation_steps"] = self.g_eval.evaluation_steps
-            copied_g_eval = GEval(**g_eval_args)
-
-            copied_g_eval.measure(test_case=test_case, _show_indicator=False)
-            metric._verbose_steps.append(
-                construct_node_verbose_log(self, depth, copied_g_eval)
-            )
-            metric.score = copied_g_eval.score
-            if metric.include_reason:
-                metric.reason = copied_g_eval.reason
+                self.child._execute(
+                    metric=metric, test_case=test_case, depth=depth
+                )
         else:
             metric._verbose_steps.append(
                 construct_node_verbose_log(self, depth)
@@ -129,7 +133,7 @@ async def _a_execute(
         self, metric: BaseMetric, test_case: LLMTestCase, depth: int
     ):
         decrement_indegree(self)
-        if self._indegree != 0:
+        if self._indegree > 0:
             return
 
         if isinstance(self._parent, NonBinaryJudgementNode) or isinstance(
@@ -138,28 +142,35 @@ async def _a_execute(
             if self._parent._verdict.verdict != self.verdict:
                 return
 
-        if self.g_eval is not None:
-            g_eval_args = {
-                "name": self.g_eval.name,
-                "evaluation_params": self.g_eval.evaluation_params,
-                "model": metric.model,
-                "verbose_mode": metric.verbose_mode,
-            }
-            if self.g_eval.criteria:
-                g_eval_args["criteria"] = self.g_eval.criteria
+        if self.child is not None:
+            if isinstance(self.child, GEval):
+                g_eval_args = {
+                    "name": self.child.name,
+                    "evaluation_params": self.child.evaluation_params,
+                    "model": metric.model,
+                    "verbose_mode": metric.verbose_mode,
+                }
+                if self.child.criteria:
+                    g_eval_args["criteria"] = self.child.criteria
+                else:
+                    g_eval_args["evaluation_steps"] = (
+                        self.child.evaluation_steps
+                    )
+                copied_g_eval = GEval(**g_eval_args)
+
+                await copied_g_eval.a_measure(
+                    test_case=test_case, _show_indicator=False
+                )
+                metric._verbose_steps.append(
+                    construct_node_verbose_log(self, depth, copied_g_eval)
+                )
+                metric.score = copied_g_eval.score
+                if metric.include_reason:
+                    metric.reason = copied_g_eval.reason
             else:
-                g_eval_args["evaluation_steps"] = self.g_eval.evaluation_steps
-            copied_g_eval = GEval(**g_eval_args)
-
-            await copied_g_eval.a_measure(
-                test_case=test_case, _show_indicator=False
-            )
-            metric._verbose_steps.append(
-                construct_node_verbose_log(self, depth, copied_g_eval)
-            )
-            metric.score = copied_g_eval.score
-            if metric.include_reason:
-                metric.reason = copied_g_eval.reason
+                await self.child._a_execute(
+                    metric=metric, test_case=test_case, depth=depth
+                )
         else:
             metric._verbose_steps.append(
                 construct_node_verbose_log(self, depth)
@@ -229,13 +240,17 @@ def __post_init__(self):
                     "A TaskNode must not have a VerdictNode as one of their 'children'."
                 )
 
+        # print("-------")
         for child in self.children:
             child.set_parent(self)
             increment_indegree(child)
+        #     print("task node", child.__class__.__name__, id(child), child._indegree)
+        # print("-------")
 
     def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
+        self._depth = max(0, self._depth, depth)
         decrement_indegree(self)
-        if self._indegree != 0:
+        if self._indegree > 0:
             return
 
         text = """"""
@@ -262,17 +277,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
             res = metric.model.generate(prompt=prompt)
             self._output = res
 
-        metric._verbose_steps.append(construct_node_verbose_log(self, depth))
+        metric._verbose_steps.append(
+            construct_node_verbose_log(self, self._depth)
+        )
         for children in self.children:
             children._execute(
-                metric=metric, test_case=test_case, depth=depth + 1
+                metric=metric, test_case=test_case, depth=self._depth + 1
             )
 
     async def _a_execute(
         self, metric: BaseMetric, test_case: LLMTestCase, depth: int
     ):
+        self._depth = max(0, self._depth, depth)
         decrement_indegree(self)
-        if self._indegree != 0:
+        if self._indegree > 0:
             return
 
         text = """"""
@@ -300,11 +318,13 @@ async def _a_execute(
             res = await metric.model.a_generate(prompt=prompt)
             self._output = res
 
-        metric._verbose_steps.append(construct_node_verbose_log(self, depth))
+        metric._verbose_steps.append(
+            construct_node_verbose_log(self, self._depth)
+        )
         await asyncio.gather(
             *(
                 child._a_execute(
-                    metric=metric, test_case=test_case, depth=depth + 1
+                    metric=metric, test_case=test_case, depth=self._depth + 1
                 )
                 for child in self.children
             )
@@ -331,6 +351,9 @@ def __post_init__(self):
 
         # Check if all children are ClassificationResultNode and their classifications are boolean
         for child in self.children:
+            if not isinstance(child, VerdictNode):
+                raise TypeError("All children must be of type VerdictNode.")
+
             if not isinstance(child.verdict, bool):
                 raise ValueError(
                     "All children BinaryJudgementNode must have a boolean vedict."
@@ -343,13 +366,20 @@ def __post_init__(self):
                 "BinaryJudgementNode must have one True and one False VerdictNode child."
             )
 
+        # print("-------")
         for child in self.children:
             child.set_parent(self)
             increment_indegree(child)
+            if child.child is not None:
+                increment_indegree(child.child)
+        #         print("binary node nested", child.child.__class__.__name__, id(child.child), child.child._indegree)
+        #     print("binary node", child.__class__.__name__, id(child), child._indegree)
+        # print("-------")
 
     def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
+        self._depth = max(0, self._depth, depth)
         decrement_indegree(self)
-        if self._indegree != 0:
+        if self._indegree > 0:
             return
 
         text = """"""
@@ -385,17 +415,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
                 data = trimAndLoadJson(res, self)
                 self._verdict = BinaryJudgementVerdict(**data)
 
-        metric._verbose_steps.append(construct_node_verbose_log(self, depth))
+        metric._verbose_steps.append(
+            construct_node_verbose_log(self, self._depth)
+        )
         for children in self.children:
             children._execute(
-                metric=metric, test_case=test_case, depth=depth + 1
+                metric=metric, test_case=test_case, depth=self._depth + 1
             )
 
     async def _a_execute(
         self, metric: BaseMetric, test_case: LLMTestCase, depth: int
     ):
+        self._depth = max(0, self._depth, depth)
         decrement_indegree(self)
-        if self._indegree != 0:
+        if self._indegree > 0:
             return
 
         text = """"""
@@ -431,11 +464,13 @@ async def _a_execute(
                 data = trimAndLoadJson(res, self)
                 self._verdict = BinaryJudgementVerdict(**data)
 
-        metric._verbose_steps.append(construct_node_verbose_log(self, depth))
+        metric._verbose_steps.append(
+            construct_node_verbose_log(self, self._depth)
+        )
         await asyncio.gather(
             *(
                 child._a_execute(
-                    metric=metric, test_case=test_case, depth=depth + 1
+                    metric=metric, test_case=test_case, depth=self._depth + 1
                 )
                 for child in self.children
             )
@@ -463,6 +498,9 @@ def __post_init__(self):
 
         verdicts_set = set()
         for child in self.children:
+            if not isinstance(child, VerdictNode):
+                raise TypeError("All children must be of type VerdictNode.")
+
             # Check if the verdict attribute of each child is a string
             if not isinstance(child.verdict, str):
                 raise ValueError(
@@ -485,13 +523,20 @@ def __post_init__(self):
             reason=(str, ...),
         )
 
+        # print("-------")
         for child in self.children:
             child.set_parent(self)
             increment_indegree(child)
+            if child.child is not None:
+                increment_indegree(child.child)
+        #         print("non binary node nested", child.child.__class__.__name__, id(child.child), child.child._indegree)
+        #     print("non binary node", child.__class__.__name__, id(child), child._indegree)
+        # print("-------")
 
     def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
+        self._depth = max(0, self._depth, depth)
         decrement_indegree(self)
-        if self._indegree != 0:
+        if self._indegree > 0:
             return
 
         text = """"""
@@ -526,17 +571,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
                 data = trimAndLoadJson(res, self)
                 self._verdict = self._verdict_schema(**data)
 
-        metric._verbose_steps.append(construct_node_verbose_log(self, depth))
+        metric._verbose_steps.append(
+            construct_node_verbose_log(self, self._depth)
+        )
         for children in self.children:
             children._execute(
-                metric=metric, test_case=test_case, depth=depth + 1
+                metric=metric, test_case=test_case, depth=self._depth + 1
             )
 
     async def _a_execute(
         self, metric: BaseMetric, test_case: LLMTestCase, depth: int
     ):
+        self._depth = max(0, self._depth, depth)
         decrement_indegree(self)
-        if self._indegree != 0:
+        if self._indegree > 0:
             return
 
         text = """"""
@@ -571,11 +619,13 @@ async def _a_execute(
                 data = trimAndLoadJson(res, self)
                 self._verdict = self._verdict_schema(**data)
 
-        metric._verbose_steps.append(construct_node_verbose_log(self, depth))
+        metric._verbose_steps.append(
+            construct_node_verbose_log(self, self._depth)
+        )
         await asyncio.gather(
             *(
                 child._a_execute(
-                    metric=metric, test_case=test_case, depth=depth + 1
+                    metric=metric, test_case=test_case, depth=self._depth + 1
                 )
                 for child in self.children
             )
@@ -615,7 +665,7 @@ def construct_node_verbose_log(
             f"{node.output_label}:\n{node._output}\n"
         )
     elif isinstance(node, VerdictNode):
-        is_g_eval = node.g_eval is not None
+        is_g_eval = node.child is not None
         type = "GEval" if is_g_eval else "Deterministic"
         verbose_log = (
             "________________________\n"