22
22
23
23
24
24
class BaseNode :
25
- _indegree : Optional [int ] = None
25
+ _indegree : int = 0
26
+ _depth : int = 0
26
27
27
28
def set_parent (self , parent : "BaseNode" ):
28
29
if hasattr (self , "_parent" ):
@@ -46,38 +47,32 @@ async def _a_execute(
46
47
47
48
48
49
def increment_indegree (node : BaseNode ):
49
- if node ._indegree is None :
50
- node ._indegree = 1
51
- else :
52
- node ._indegree += 1
50
+ node ._indegree += 1
53
51
54
52
55
53
def decrement_indegree (node : BaseNode ):
56
- if node ._indegree is None :
57
- node ._indegree = 0
58
- else :
59
- node ._indegree -= 1
54
+ node ._indegree -= 1
60
55
61
56
62
57
@dataclass
63
58
class VerdictNode (BaseNode ):
64
59
verdict : Union [str , bool ]
65
60
score : Optional [int ] = None
66
- g_eval : Optional [GEval ] = None
61
+ child : Optional [BaseNode ] = None
67
62
_parent : Optional [BaseNode ] = None
68
63
69
64
def __hash__ (self ):
70
65
return id (self )
71
66
72
67
def __post_init__ (self ):
73
68
# Ensure either `score` or `g_eval` is set, but not both
74
- if self .score is not None and self .g_eval is not None :
69
+ if self .score is not None and self .child is not None :
75
70
raise ValueError (
76
- "A VerdictNode can have either a 'score' or a 'g_eval ', but not both."
71
+ "A VerdictNode can have either a 'score' or a 'child ', but not both."
77
72
)
78
- if self .score is None and self .g_eval is None :
73
+ if self .score is None and self .child is None :
79
74
raise ValueError (
80
- "A VerdictNode must have either a 'score' or a 'g_eval '."
75
+ "A VerdictNode must have either a 'score' or a 'child '."
81
76
)
82
77
83
78
if self .score is not None :
@@ -88,7 +83,7 @@ def __post_init__(self):
88
83
89
84
def _execute (self , metric : BaseMetric , test_case : LLMTestCase , depth : int ):
90
85
decrement_indegree (self )
91
- if self ._indegree != 0 :
86
+ if self ._indegree > 0 :
92
87
return
93
88
94
89
if isinstance (self ._parent , NonBinaryJudgementNode ) or isinstance (
@@ -97,26 +92,35 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
97
92
if self ._parent ._verdict .verdict != self .verdict :
98
93
return
99
94
100
- if self .g_eval is not None :
101
- g_eval_args = {
102
- "name" : self .g_eval .name ,
103
- "evaluation_params" : self .g_eval .evaluation_params ,
104
- "model" : metric .model ,
105
- "verbose_mode" : metric .verbose_mode ,
106
- }
107
- if self .g_eval .criteria :
108
- g_eval_args ["criteria" ] = self .g_eval .criteria
95
+ if self .child is not None :
96
+ if isinstance (self .child , GEval ):
97
+ g_eval_args = {
98
+ "name" : self .child .name ,
99
+ "evaluation_params" : self .child .evaluation_params ,
100
+ "model" : metric .model ,
101
+ "verbose_mode" : metric .verbose_mode ,
102
+ }
103
+ if self .child .criteria :
104
+ g_eval_args ["criteria" ] = self .child .criteria
105
+ else :
106
+ g_eval_args ["evaluation_steps" ] = (
107
+ self .child .evaluation_steps
108
+ )
109
+ copied_g_eval = GEval (** g_eval_args )
110
+
111
+ copied_g_eval .measure (
112
+ test_case = test_case , _show_indicator = False
113
+ )
114
+ metric ._verbose_steps .append (
115
+ construct_node_verbose_log (self , depth , copied_g_eval )
116
+ )
117
+ metric .score = copied_g_eval .score
118
+ if metric .include_reason :
119
+ metric .reason = copied_g_eval .reason
109
120
else :
110
- g_eval_args ["evaluation_steps" ] = self .g_eval .evaluation_steps
111
- copied_g_eval = GEval (** g_eval_args )
112
-
113
- copied_g_eval .measure (test_case = test_case , _show_indicator = False )
114
- metric ._verbose_steps .append (
115
- construct_node_verbose_log (self , depth , copied_g_eval )
116
- )
117
- metric .score = copied_g_eval .score
118
- if metric .include_reason :
119
- metric .reason = copied_g_eval .reason
121
+ self .child ._execute (
122
+ metric = metric , test_case = test_case , depth = depth
123
+ )
120
124
else :
121
125
metric ._verbose_steps .append (
122
126
construct_node_verbose_log (self , depth )
@@ -129,7 +133,7 @@ async def _a_execute(
129
133
self , metric : BaseMetric , test_case : LLMTestCase , depth : int
130
134
):
131
135
decrement_indegree (self )
132
- if self ._indegree != 0 :
136
+ if self ._indegree > 0 :
133
137
return
134
138
135
139
if isinstance (self ._parent , NonBinaryJudgementNode ) or isinstance (
@@ -138,28 +142,35 @@ async def _a_execute(
138
142
if self ._parent ._verdict .verdict != self .verdict :
139
143
return
140
144
141
- if self .g_eval is not None :
142
- g_eval_args = {
143
- "name" : self .g_eval .name ,
144
- "evaluation_params" : self .g_eval .evaluation_params ,
145
- "model" : metric .model ,
146
- "verbose_mode" : metric .verbose_mode ,
147
- }
148
- if self .g_eval .criteria :
149
- g_eval_args ["criteria" ] = self .g_eval .criteria
145
+ if self .child is not None :
146
+ if isinstance (self .child , GEval ):
147
+ g_eval_args = {
148
+ "name" : self .child .name ,
149
+ "evaluation_params" : self .child .evaluation_params ,
150
+ "model" : metric .model ,
151
+ "verbose_mode" : metric .verbose_mode ,
152
+ }
153
+ if self .child .criteria :
154
+ g_eval_args ["criteria" ] = self .child .criteria
155
+ else :
156
+ g_eval_args ["evaluation_steps" ] = (
157
+ self .child .evaluation_steps
158
+ )
159
+ copied_g_eval = GEval (** g_eval_args )
160
+
161
+ await copied_g_eval .a_measure (
162
+ test_case = test_case , _show_indicator = False
163
+ )
164
+ metric ._verbose_steps .append (
165
+ construct_node_verbose_log (self , depth , copied_g_eval )
166
+ )
167
+ metric .score = copied_g_eval .score
168
+ if metric .include_reason :
169
+ metric .reason = copied_g_eval .reason
150
170
else :
151
- g_eval_args ["evaluation_steps" ] = self .g_eval .evaluation_steps
152
- copied_g_eval = GEval (** g_eval_args )
153
-
154
- await copied_g_eval .a_measure (
155
- test_case = test_case , _show_indicator = False
156
- )
157
- metric ._verbose_steps .append (
158
- construct_node_verbose_log (self , depth , copied_g_eval )
159
- )
160
- metric .score = copied_g_eval .score
161
- if metric .include_reason :
162
- metric .reason = copied_g_eval .reason
171
+ await self .child ._a_execute (
172
+ metric = metric , test_case = test_case , depth = depth
173
+ )
163
174
else :
164
175
metric ._verbose_steps .append (
165
176
construct_node_verbose_log (self , depth )
@@ -229,13 +240,17 @@ def __post_init__(self):
229
240
"A TaskNode must not have a VerdictNode as one of their 'children'."
230
241
)
231
242
243
+ # print("-------")
232
244
for child in self .children :
233
245
child .set_parent (self )
234
246
increment_indegree (child )
247
+ # print("task node", child.__class__.__name__, id(child), child._indegree)
248
+ # print("-------")
235
249
236
250
def _execute (self , metric : BaseMetric , test_case : LLMTestCase , depth : int ):
251
+ self ._depth = max (0 , self ._depth , depth )
237
252
decrement_indegree (self )
238
- if self ._indegree != 0 :
253
+ if self ._indegree > 0 :
239
254
return
240
255
241
256
text = """"""
@@ -262,17 +277,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
262
277
res = metric .model .generate (prompt = prompt )
263
278
self ._output = res
264
279
265
- metric ._verbose_steps .append (construct_node_verbose_log (self , depth ))
280
+ metric ._verbose_steps .append (
281
+ construct_node_verbose_log (self , self ._depth )
282
+ )
266
283
for children in self .children :
267
284
children ._execute (
268
- metric = metric , test_case = test_case , depth = depth + 1
285
+ metric = metric , test_case = test_case , depth = self . _depth + 1
269
286
)
270
287
271
288
async def _a_execute (
272
289
self , metric : BaseMetric , test_case : LLMTestCase , depth : int
273
290
):
291
+ self ._depth = max (0 , self ._depth , depth )
274
292
decrement_indegree (self )
275
- if self ._indegree != 0 :
293
+ if self ._indegree > 0 :
276
294
return
277
295
278
296
text = """"""
@@ -300,11 +318,13 @@ async def _a_execute(
300
318
res = await metric .model .a_generate (prompt = prompt )
301
319
self ._output = res
302
320
303
- metric ._verbose_steps .append (construct_node_verbose_log (self , depth ))
321
+ metric ._verbose_steps .append (
322
+ construct_node_verbose_log (self , self ._depth )
323
+ )
304
324
await asyncio .gather (
305
325
* (
306
326
child ._a_execute (
307
- metric = metric , test_case = test_case , depth = depth + 1
327
+ metric = metric , test_case = test_case , depth = self . _depth + 1
308
328
)
309
329
for child in self .children
310
330
)
@@ -331,6 +351,9 @@ def __post_init__(self):
331
351
332
352
# Check if all children are ClassificationResultNode and their classifications are boolean
333
353
for child in self .children :
354
+ if not isinstance (child , VerdictNode ):
355
+ raise TypeError ("All children must be of type VerdictNode." )
356
+
334
357
if not isinstance (child .verdict , bool ):
335
358
raise ValueError (
336
359
"All children BinaryJudgementNode must have a boolean vedict."
@@ -343,13 +366,20 @@ def __post_init__(self):
343
366
"BinaryJudgementNode must have one True and one False VerdictNode child."
344
367
)
345
368
369
+ # print("-------")
346
370
for child in self .children :
347
371
child .set_parent (self )
348
372
increment_indegree (child )
373
+ if child .child is not None :
374
+ increment_indegree (child .child )
375
+ # print("binary node nested", child.child.__class__.__name__, id(child.child), child.child._indegree)
376
+ # print("binary node", child.__class__.__name__, id(child), child._indegree)
377
+ # print("-------")
349
378
350
379
def _execute (self , metric : BaseMetric , test_case : LLMTestCase , depth : int ):
380
+ self ._depth = max (0 , self ._depth , depth )
351
381
decrement_indegree (self )
352
- if self ._indegree != 0 :
382
+ if self ._indegree > 0 :
353
383
return
354
384
355
385
text = """"""
@@ -385,17 +415,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
385
415
data = trimAndLoadJson (res , self )
386
416
self ._verdict = BinaryJudgementVerdict (** data )
387
417
388
- metric ._verbose_steps .append (construct_node_verbose_log (self , depth ))
418
+ metric ._verbose_steps .append (
419
+ construct_node_verbose_log (self , self ._depth )
420
+ )
389
421
for children in self .children :
390
422
children ._execute (
391
- metric = metric , test_case = test_case , depth = depth + 1
423
+ metric = metric , test_case = test_case , depth = self . _depth + 1
392
424
)
393
425
394
426
async def _a_execute (
395
427
self , metric : BaseMetric , test_case : LLMTestCase , depth : int
396
428
):
429
+ self ._depth = max (0 , self ._depth , depth )
397
430
decrement_indegree (self )
398
- if self ._indegree != 0 :
431
+ if self ._indegree > 0 :
399
432
return
400
433
401
434
text = """"""
@@ -431,11 +464,13 @@ async def _a_execute(
431
464
data = trimAndLoadJson (res , self )
432
465
self ._verdict = BinaryJudgementVerdict (** data )
433
466
434
- metric ._verbose_steps .append (construct_node_verbose_log (self , depth ))
467
+ metric ._verbose_steps .append (
468
+ construct_node_verbose_log (self , self ._depth )
469
+ )
435
470
await asyncio .gather (
436
471
* (
437
472
child ._a_execute (
438
- metric = metric , test_case = test_case , depth = depth + 1
473
+ metric = metric , test_case = test_case , depth = self . _depth + 1
439
474
)
440
475
for child in self .children
441
476
)
@@ -463,6 +498,9 @@ def __post_init__(self):
463
498
464
499
verdicts_set = set ()
465
500
for child in self .children :
501
+ if not isinstance (child , VerdictNode ):
502
+ raise TypeError ("All children must be of type VerdictNode." )
503
+
466
504
# Check if the verdict attribute of each child is a string
467
505
if not isinstance (child .verdict , str ):
468
506
raise ValueError (
@@ -485,13 +523,20 @@ def __post_init__(self):
485
523
reason = (str , ...),
486
524
)
487
525
526
+ # print("-------")
488
527
for child in self .children :
489
528
child .set_parent (self )
490
529
increment_indegree (child )
530
+ if child .child is not None :
531
+ increment_indegree (child .child )
532
+ # print("non binary node nested", child.child.__class__.__name__, id(child.child), child.child._indegree)
533
+ # print("non binary node", child.__class__.__name__, id(child), child._indegree)
534
+ # print("-------")
491
535
492
536
def _execute (self , metric : BaseMetric , test_case : LLMTestCase , depth : int ):
537
+ self ._depth = max (0 , self ._depth , depth )
493
538
decrement_indegree (self )
494
- if self ._indegree != 0 :
539
+ if self ._indegree > 0 :
495
540
return
496
541
497
542
text = """"""
@@ -526,17 +571,20 @@ def _execute(self, metric: BaseMetric, test_case: LLMTestCase, depth: int):
526
571
data = trimAndLoadJson (res , self )
527
572
self ._verdict = self ._verdict_schema (** data )
528
573
529
- metric ._verbose_steps .append (construct_node_verbose_log (self , depth ))
574
+ metric ._verbose_steps .append (
575
+ construct_node_verbose_log (self , self ._depth )
576
+ )
530
577
for children in self .children :
531
578
children ._execute (
532
- metric = metric , test_case = test_case , depth = depth + 1
579
+ metric = metric , test_case = test_case , depth = self . _depth + 1
533
580
)
534
581
535
582
async def _a_execute (
536
583
self , metric : BaseMetric , test_case : LLMTestCase , depth : int
537
584
):
585
+ self ._depth = max (0 , self ._depth , depth )
538
586
decrement_indegree (self )
539
- if self ._indegree != 0 :
587
+ if self ._indegree > 0 :
540
588
return
541
589
542
590
text = """"""
@@ -571,11 +619,13 @@ async def _a_execute(
571
619
data = trimAndLoadJson (res , self )
572
620
self ._verdict = self ._verdict_schema (** data )
573
621
574
- metric ._verbose_steps .append (construct_node_verbose_log (self , depth ))
622
+ metric ._verbose_steps .append (
623
+ construct_node_verbose_log (self , self ._depth )
624
+ )
575
625
await asyncio .gather (
576
626
* (
577
627
child ._a_execute (
578
- metric = metric , test_case = test_case , depth = depth + 1
628
+ metric = metric , test_case = test_case , depth = self . _depth + 1
579
629
)
580
630
for child in self .children
581
631
)
@@ -615,7 +665,7 @@ def construct_node_verbose_log(
615
665
f"{ node .output_label } :\n { node ._output } \n "
616
666
)
617
667
elif isinstance (node , VerdictNode ):
618
- is_g_eval = node .g_eval is not None
668
+ is_g_eval = node .child is not None
619
669
type = "GEval" if is_g_eval else "Deterministic"
620
670
verbose_log = (
621
671
"________________________\n "
0 commit comments