Skip to content

Commit 8cb2ccd

Browse files
committed
fix dag
1 parent fd5f9d1 commit 8cb2ccd

File tree

4 files changed

+215
-110
lines changed

4 files changed

+215
-110
lines changed

deepeval/metrics/dag/nodes.py

Lines changed: 125 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222

2323

2424
class BaseNode:
25-
_indegree: Optional[int] = None
25+
_indegree: int = 0
26+
_depth: int = 0
2627

2728
def set_parent(self, parent: "BaseNode"):
2829
if hasattr(self, "_parent"):
@@ -46,38 +47,32 @@ async def _a_execute(
4647

4748

4849
def increment_indegree(node: BaseNode):
49-
if node._indegree is None:
50-
node._indegree = 1
51-
else:
52-
node._indegree += 1
50+
node._indegree += 1
5351

5452

5553
def decrement_indegree(node: BaseNode):
56-
if node._indegree is None:
57-
node._indegree = 0
58-
else:
59-
node._indegree -= 1
54+
node._indegree -= 1
6055

6156

6257
@dataclass
6358
class VerdictNode(BaseNode):
6459
verdict: Union[str, bool]
6560
score: Optional[int] = None
66-
g_eval: Optional[GEval] = None
61+
child: Optional[BaseNode] = None
6762
_parent: Optional[BaseNode] = None
6863

6964
def __hash__(self):
7065
return id(self)
7166

7267
def __post_init__(self):
7368
# Ensure either `score` or `g_eval` is set, but not both
74-
if self.score is not None and self.g_eval is not None:
69+
if self.score is not None and self.child is not None:
7570
raise ValueError(
76-
"A VerdictNode can have either a 'score' or a 'g_eval', but not both."
71+
"A VerdictNode can have either a 'score' or a 'child', but not both."
7772
)
78-
if self.score is None and self.g_eval is None:
73+
if self.score is None and self.child is None:
7974
raise ValueError(
80-
"A VerdictNode must have either a 'score' or a 'g_eval'."
75+
"A VerdictNode must have either a 'score' or a 'child'."
8176
)
8277

8378
if self.score is not None:
@@ -88,7 +83,7 @@ def __post_init__(self):
8883

8984
def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
9085
decrement_indegree(self)
91-
if self._indegree != 0:
86+
if self._indegree > 0:
9287
return
9388

9489
if isinstance(self._parent, NonBinaryJudgementNode) or isinstance(
@@ -97,26 +92,35 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
9792
if self._parent._verdict.verdict != self.verdict:
9893
return
9994

100-
if self.g_eval is not None:
101-
g_eval_args = {
102-
"name": self.g_eval.name,
103-
"evaluation_params": self.g_eval.evaluation_params,
104-
"model": metric.model,
105-
"verbose_mode": metric.verbose_mode,
106-
}
107-
if self.g_eval.criteria:
108-
g_eval_args["criteria"] = self.g_eval.criteria
95+
if self.child is not None:
96+
if isinstance(self.child, GEval):
97+
g_eval_args = {
98+
"name": self.child.name,
99+
"evaluation_params": self.child.evaluation_params,
100+
"model": metric.model,
101+
"verbose_mode": metric.verbose_mode,
102+
}
103+
if self.child.criteria:
104+
g_eval_args["criteria"] = self.child.criteria
105+
else:
106+
g_eval_args["evaluation_steps"] = (
107+
self.child.evaluation_steps
108+
)
109+
copied_g_eval = GEval(**g_eval_args)
110+
111+
copied_g_eval.measure(
112+
test_case=test_case, _show_indicator=False
113+
)
114+
metric._verbose_steps.append(
115+
construct_node_verbose_log(self, depth, copied_g_eval)
116+
)
117+
metric.score = copied_g_eval.score
118+
if metric.include_reason:
119+
metric.reason = copied_g_eval.reason
109120
else:
110-
g_eval_args["evaluation_steps"] = self.g_eval.evaluation_steps
111-
copied_g_eval = GEval(**g_eval_args)
112-
113-
copied_g_eval.measure(test_case=test_case, _show_indicator=False)
114-
metric._verbose_steps.append(
115-
construct_node_verbose_log(self, depth, copied_g_eval)
116-
)
117-
metric.score = copied_g_eval.score
118-
if metric.include_reason:
119-
metric.reason = copied_g_eval.reason
121+
self.child._execute(
122+
metric=metric, test_case=test_case, depth=depth
123+
)
120124
else:
121125
metric._verbose_steps.append(
122126
construct_node_verbose_log(self, depth)
@@ -129,7 +133,7 @@ async def _a_execute(
129133
self, metric: BaseMetric, test_case: LLMTestCase, depth: int
130134
):
131135
decrement_indegree(self)
132-
if self._indegree != 0:
136+
if self._indegree > 0:
133137
return
134138

135139
if isinstance(self._parent, NonBinaryJudgementNode) or isinstance(
@@ -138,28 +142,35 @@ async def _a_execute(
138142
if self._parent._verdict.verdict != self.verdict:
139143
return
140144

141-
if self.g_eval is not None:
142-
g_eval_args = {
143-
"name": self.g_eval.name,
144-
"evaluation_params": self.g_eval.evaluation_params,
145-
"model": metric.model,
146-
"verbose_mode": metric.verbose_mode,
147-
}
148-
if self.g_eval.criteria:
149-
g_eval_args["criteria"] = self.g_eval.criteria
145+
if self.child is not None:
146+
if isinstance(self.child, GEval):
147+
g_eval_args = {
148+
"name": self.child.name,
149+
"evaluation_params": self.child.evaluation_params,
150+
"model": metric.model,
151+
"verbose_mode": metric.verbose_mode,
152+
}
153+
if self.child.criteria:
154+
g_eval_args["criteria"] = self.child.criteria
155+
else:
156+
g_eval_args["evaluation_steps"] = (
157+
self.child.evaluation_steps
158+
)
159+
copied_g_eval = GEval(**g_eval_args)
160+
161+
await copied_g_eval.a_measure(
162+
test_case=test_case, _show_indicator=False
163+
)
164+
metric._verbose_steps.append(
165+
construct_node_verbose_log(self, depth, copied_g_eval)
166+
)
167+
metric.score = copied_g_eval.score
168+
if metric.include_reason:
169+
metric.reason = copied_g_eval.reason
150170
else:
151-
g_eval_args["evaluation_steps"] = self.g_eval.evaluation_steps
152-
copied_g_eval = GEval(**g_eval_args)
153-
154-
await copied_g_eval.a_measure(
155-
test_case=test_case, _show_indicator=False
156-
)
157-
metric._verbose_steps.append(
158-
construct_node_verbose_log(self, depth, copied_g_eval)
159-
)
160-
metric.score = copied_g_eval.score
161-
if metric.include_reason:
162-
metric.reason = copied_g_eval.reason
171+
await self.child._a_execute(
172+
metric=metric, test_case=test_case, depth=depth
173+
)
163174
else:
164175
metric._verbose_steps.append(
165176
construct_node_verbose_log(self, depth)
@@ -229,13 +240,17 @@ def __post_init__(self):
229240
"A TaskNode must not have a VerdictNode as one of their 'children'."
230241
)
231242

243+
# print("-------")
232244
for child in self.children:
233245
child.set_parent(self)
234246
increment_indegree(child)
247+
# print("task node", child.__class__.__name__, id(child), child._indegree)
248+
# print("-------")
235249

236250
def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
251+
self._depth = max(0, self._depth, depth)
237252
decrement_indegree(self)
238-
if self._indegree != 0:
253+
if self._indegree > 0:
239254
return
240255

241256
text = """"""
@@ -262,17 +277,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
262277
res = metric.model.generate(prompt=prompt)
263278
self._output = res
264279

265-
metric._verbose_steps.append(construct_node_verbose_log(self, depth))
280+
metric._verbose_steps.append(
281+
construct_node_verbose_log(self, self._depth)
282+
)
266283
for children in self.children:
267284
children._execute(
268-
metric=metric, test_case=test_case, depth=depth + 1
285+
metric=metric, test_case=test_case, depth=self._depth + 1
269286
)
270287

271288
async def _a_execute(
272289
self, metric: BaseMetric, test_case: LLMTestCase, depth: int
273290
):
291+
self._depth = max(0, self._depth, depth)
274292
decrement_indegree(self)
275-
if self._indegree != 0:
293+
if self._indegree > 0:
276294
return
277295

278296
text = """"""
@@ -300,11 +318,13 @@ async def _a_execute(
300318
res = await metric.model.a_generate(prompt=prompt)
301319
self._output = res
302320

303-
metric._verbose_steps.append(construct_node_verbose_log(self, depth))
321+
metric._verbose_steps.append(
322+
construct_node_verbose_log(self, self._depth)
323+
)
304324
await asyncio.gather(
305325
*(
306326
child._a_execute(
307-
metric=metric, test_case=test_case, depth=depth + 1
327+
metric=metric, test_case=test_case, depth=self._depth + 1
308328
)
309329
for child in self.children
310330
)
@@ -331,6 +351,9 @@ def __post_init__(self):
331351

332352
# Check if all children are ClassificationResultNode and their classifications are boolean
333353
for child in self.children:
354+
if not isinstance(child, VerdictNode):
355+
raise TypeError("All children must be of type VerdictNode.")
356+
334357
if not isinstance(child.verdict, bool):
335358
raise ValueError(
336359
"All children BinaryJudgementNode must have a boolean vedict."
@@ -343,13 +366,20 @@ def __post_init__(self):
343366
"BinaryJudgementNode must have one True and one False VerdictNode child."
344367
)
345368

369+
# print("-------")
346370
for child in self.children:
347371
child.set_parent(self)
348372
increment_indegree(child)
373+
if child.child is not None:
374+
increment_indegree(child.child)
375+
# print("binary node nested", child.child.__class__.__name__, id(child.child), child.child._indegree)
376+
# print("binary node", child.__class__.__name__, id(child), child._indegree)
377+
# print("-------")
349378

350379
def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
380+
self._depth = max(0, self._depth, depth)
351381
decrement_indegree(self)
352-
if self._indegree != 0:
382+
if self._indegree > 0:
353383
return
354384

355385
text = """"""
@@ -385,17 +415,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
385415
data = trimAndLoadJson(res, self)
386416
self._verdict = BinaryJudgementVerdict(**data)
387417

388-
metric._verbose_steps.append(construct_node_verbose_log(self, depth))
418+
metric._verbose_steps.append(
419+
construct_node_verbose_log(self, self._depth)
420+
)
389421
for children in self.children:
390422
children._execute(
391-
metric=metric, test_case=test_case, depth=depth + 1
423+
metric=metric, test_case=test_case, depth=self._depth + 1
392424
)
393425

394426
async def _a_execute(
395427
self, metric: BaseMetric, test_case: LLMTestCase, depth: int
396428
):
429+
self._depth = max(0, self._depth, depth)
397430
decrement_indegree(self)
398-
if self._indegree != 0:
431+
if self._indegree > 0:
399432
return
400433

401434
text = """"""
@@ -431,11 +464,13 @@ async def _a_execute(
431464
data = trimAndLoadJson(res, self)
432465
self._verdict = BinaryJudgementVerdict(**data)
433466

434-
metric._verbose_steps.append(construct_node_verbose_log(self, depth))
467+
metric._verbose_steps.append(
468+
construct_node_verbose_log(self, self._depth)
469+
)
435470
await asyncio.gather(
436471
*(
437472
child._a_execute(
438-
metric=metric, test_case=test_case, depth=depth + 1
473+
metric=metric, test_case=test_case, depth=self._depth + 1
439474
)
440475
for child in self.children
441476
)
@@ -463,6 +498,9 @@ def __post_init__(self):
463498

464499
verdicts_set = set()
465500
for child in self.children:
501+
if not isinstance(child, VerdictNode):
502+
raise TypeError("All children must be of type VerdictNode.")
503+
466504
# Check if the verdict attribute of each child is a string
467505
if not isinstance(child.verdict, str):
468506
raise ValueError(
@@ -485,13 +523,20 @@ def __post_init__(self):
485523
reason=(str, ...),
486524
)
487525

526+
# print("-------")
488527
for child in self.children:
489528
child.set_parent(self)
490529
increment_indegree(child)
530+
if child.child is not None:
531+
increment_indegree(child.child)
532+
# print("non binary node nested", child.child.__class__.__name__, id(child.child), child.child._indegree)
533+
# print("non binary node", child.__class__.__name__, id(child), child._indegree)
534+
# print("-------")
491535

492536
def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
537+
self._depth = max(0, self._depth, depth)
493538
decrement_indegree(self)
494-
if self._indegree != 0:
539+
if self._indegree > 0:
495540
return
496541

497542
text = """"""
@@ -526,17 +571,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
526571
data = trimAndLoadJson(res, self)
527572
self._verdict = self._verdict_schema(**data)
528573

529-
metric._verbose_steps.append(construct_node_verbose_log(self, depth))
574+
metric._verbose_steps.append(
575+
construct_node_verbose_log(self, self._depth)
576+
)
530577
for children in self.children:
531578
children._execute(
532-
metric=metric, test_case=test_case, depth=depth + 1
579+
metric=metric, test_case=test_case, depth=self._depth + 1
533580
)
534581

535582
async def _a_execute(
536583
self, metric: BaseMetric, test_case: LLMTestCase, depth: int
537584
):
585+
self._depth = max(0, self._depth, depth)
538586
decrement_indegree(self)
539-
if self._indegree != 0:
587+
if self._indegree > 0:
540588
return
541589

542590
text = """"""
@@ -571,11 +619,13 @@ async def _a_execute(
571619
data = trimAndLoadJson(res, self)
572620
self._verdict = self._verdict_schema(**data)
573621

574-
metric._verbose_steps.append(construct_node_verbose_log(self, depth))
622+
metric._verbose_steps.append(
623+
construct_node_verbose_log(self, self._depth)
624+
)
575625
await asyncio.gather(
576626
*(
577627
child._a_execute(
578-
metric=metric, test_case=test_case, depth=depth + 1
628+
metric=metric, test_case=test_case, depth=self._depth + 1
579629
)
580630
for child in self.children
581631
)
@@ -615,7 +665,7 @@ def construct_node_verbose_log(
615665
f"{node.output_label}:\n{node._output}\n"
616666
)
617667
elif isinstance(node, VerdictNode):
618-
is_g_eval = node.g_eval is not None
668+
is_g_eval = node.child is not None
619669
type = "GEval" if is_g_eval else "Deterministic"
620670
verbose_log = (
621671
"________________________\n"

0 commit comments

Comments
 (0)