Skip to content

Commit 2418b9f

Browse files
authored
Merge branch 'confident-ai:main' into main
2 parents a0d3d7b + ee0f7bf commit 2418b9f

File tree

12 files changed

+571
-552
lines changed

12 files changed

+571
-552
lines changed

CITATION.cff

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ authors:
66
- family-names: Vongthongsri
77
given-names: Kritin
88
title: deepeval
9-
version: 3.0.8
9+
version: 3.1.0
1010
date-released: "2025-06-08"
1111
url: https://confident-ai.com
1212
repository-code: https://github.com/confident-ai/deepeval

README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,18 @@
3434
</a>
3535
</p>
3636

37+
<p align="center">
38+
<!-- Keep these links. Translations will automatically update with the README. -->
39+
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=de">Deutsch</a> |
40+
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=es">Español</a> |
41+
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=fr">français</a> |
42+
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=ja">日本語</a> |
43+
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=ko">한국어</a> |
44+
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=pt">Português</a> |
45+
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=ru">Русский</a> |
46+
<a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=zh">中文</a>
47+
</p>
48+
3749
**DeepEval** is a simple-to-use, open-source LLM evaluation framework, for evaluating and testing large-language model systems. It is similar to Pytest but specialized for unit testing LLM outputs. DeepEval incorporates the latest research to evaluate LLM outputs based on metrics such as G-Eval, hallucination, answer relevancy, RAGAS, etc., which uses LLMs and various other NLP models that runs **locally on your machine** for evaluation.
3850

3951
Whether your LLM applications are RAG pipelines, chatbots, AI agents, implemented via LangChain or LlamaIndex, DeepEval has you covered. With it, you can easily determine the optimal models, prompts, and architecture to improve your RAG pipeline, agentic workflows, prevent prompt drifting, or even transition from OpenAI to hosting your own Deepseek R1 with confidence.

bb.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# Conversation 1: Initial contact and address collection
66
test_case_1 = ConversationalTestCase(
7-
chatbot_role="A humble and doubtful wizard",
7+
# chatbot_role="A humble and doubtful wizard",
88
turns=[
99
Turn(
1010
role="assistant",
@@ -34,7 +34,7 @@
3434

3535
# Conversation 2: Account number correction and personal details
3636
test_case_2 = ConversationalTestCase(
37-
chatbot_role="A humble and doubtful wizard",
37+
# chatbot_role="A humble and doubtful wizard",
3838
turns=[
3939
Turn(
4040
role="assistant",
@@ -62,7 +62,7 @@
6262

6363
# Conversation 3: Phone details and final confirmation with tools
6464
test_case_3 = ConversationalTestCase(
65-
chatbot_role="A humble and doubtful wizard",
65+
# chatbot_role="A hupmble and doubtful wizard",
6666
turns=[
6767
Turn(role="user", content="555-0102"),
6868
Turn(

deepeval/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,5 +69,9 @@ def update_warning_opt_in():
6969
return os.getenv("DEEPEVAL_UPDATE_WARNING_OPT_IN") == "YES"
7070

7171

72+
def is_read_only_env():
73+
return os.getenv("DEEPEVAL_FILE_SYSTEM") == "READ_ONLY"
74+
75+
7276
if update_warning_opt_in():
7377
check_for_update()

deepeval/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__: str = "3.0.8"
1+
__version__: str = "3.1.0"

deepeval/metrics/conversational_g_eval/conversational_g_eval.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,9 +84,7 @@ def measure(
8484
_show_indicator: bool = True,
8585
_in_component: bool = False,
8686
) -> float:
87-
check_conversational_test_case_params(
88-
test_case, self.evaluation_params, self
89-
)
87+
check_conversational_test_case_params(test_case, self)
9088

9189
self.evaluation_cost = 0 if self.using_native_model else None
9290
with metric_progress_indicator(
@@ -132,9 +130,7 @@ async def a_measure(
132130
_show_indicator: bool = True,
133131
_in_component: bool = False,
134132
) -> float:
135-
check_conversational_test_case_params(
136-
test_case, self.evaluation_params, self
137-
)
133+
check_conversational_test_case_params(test_case, self)
138134

139135
self.evaluation_cost = 0 if self.using_native_model else None
140136
with metric_progress_indicator(

deepeval/synthesizer/synthesizer.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,7 @@ def generate_goldens_from_docs(
214214
_send_data=False,
215215
_reset_cost=False,
216216
)
217+
self.synthetic_goldens.extend(goldens)
217218
if self.cost_tracking and self.using_native_model:
218219
print(f"💰 API cost: {self.synthesis_cost:.6f}")
219220
if _send_data == True:
@@ -506,12 +507,11 @@ def generate_goldens_from_contexts(
506507
# Remove pbar if not from docs
507508
remove_pbars(progress, [pbar_id]) if _progress is None else None
508509

509-
# Wrap-up Synthesis
510-
self.synthetic_goldens.extend(goldens)
511510
if _send_data == True:
512511
pass
513512
if _reset_cost and self.cost_tracking and self.using_native_model:
514513
print(f"💰 API cost: {self.synthesis_cost:.6f}")
514+
self.synthetic_goldens.extend(goldens)
515515
return goldens
516516

517517
async def a_generate_goldens_from_contexts(
@@ -544,7 +544,6 @@ async def a_generate_goldens_from_contexts(
544544
) as (progress, pbar_id), (
545545
progress if _progress is None else nullcontext()
546546
):
547-
548547
tasks = [
549548
self.task_wrapper(
550549
semaphore,
@@ -567,6 +566,7 @@ async def a_generate_goldens_from_contexts(
567566

568567
if _reset_cost and self.cost_tracking and self.using_native_model:
569568
print(f"💰 API cost: {self.synthesis_cost:.6f}")
569+
self.synthetic_goldens.extend(goldens)
570570
return goldens
571571

572572
async def _a_generate_from_context(
@@ -722,6 +722,7 @@ async def process_input(
722722
+ [pbar_generate_inputs_id, pbar_generate_goldens_id],
723723
)
724724
goldens.extend(results)
725+
self.synthetic_goldens.extend(goldens)
725726

726727
async def _a_generate_text_to_sql_from_context(
727728
self,
@@ -769,7 +770,6 @@ async def a_generate_goldens_from_scratch(
769770
self,
770771
num_goldens: int,
771772
) -> List[Golden]:
772-
773773
if (
774774
self.styling_config.scenario is None
775775
or self.styling_config.task is None
@@ -838,7 +838,9 @@ async def evolve_input(i, data: SyntheticData):
838838
)
839839
for evolved_prompt, evolutions in evolved_prompts_list
840840
]
841-
return goldens
841+
842+
self.synthetic_goldens.extend(goldens)
843+
return goldens
842844

843845
def generate_goldens_from_scratch(
844846
self,
@@ -917,6 +919,7 @@ def generate_goldens_from_scratch(
917919
self.synthetic_goldens.extend(goldens)
918920
if _send_data == True:
919921
pass
922+
self.synthetic_goldens.extend(goldens)
920923
return goldens
921924

922925
def transform_distribution(
@@ -987,7 +990,6 @@ def generate_goldens_from_goldens(
987990
**styles_json, expected_output_format=None
988991
)
989992
self.styling_config = styling_config
990-
991993
# Generate goldens from scratch or from contexts if available
992994
if len(contexts) == 0:
993995
return self.generate_goldens_from_scratch(

deepeval/telemetry.py

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -28,30 +28,7 @@ class Feature(Enum):
2828
TELEMETRY_PATH = os.path.join(HIDDEN_DIR, TELEMETRY_DATA_FILE)
2929

3030
#########################################################
31-
### Move Folders ########################################
32-
#########################################################
33-
34-
if os.path.exists(KEY_FILE) and not os.path.isdir(HIDDEN_DIR):
35-
temp_deepeval_file_name = ".deepeval_temp"
36-
os.rename(KEY_FILE, temp_deepeval_file_name)
37-
os.makedirs(HIDDEN_DIR, exist_ok=True)
38-
os.rename(temp_deepeval_file_name, os.path.join(HIDDEN_DIR, KEY_FILE))
39-
40-
os.makedirs(HIDDEN_DIR, exist_ok=True)
41-
42-
if os.path.exists(TELEMETRY_DATA_FILE):
43-
os.rename(TELEMETRY_DATA_FILE, TELEMETRY_PATH)
44-
45-
if os.path.exists(".deepeval-cache.json"):
46-
os.rename(".deepeval-cache.json", f"{HIDDEN_DIR}/.deepeval-cache.json")
47-
48-
if os.path.exists(".temp_test_run_data.json"):
49-
os.rename(
50-
".temp_test_run_data.json", f"{HIDDEN_DIR}/.temp_test_run_data.json"
51-
)
52-
53-
#########################################################
54-
### Telemetry Config ####################################
31+
### Telemetry HELPERS ###################################
5532
#########################################################
5633

5734

@@ -77,6 +54,33 @@ def get_anonymous_public_ip():
7754
return None
7855

7956

57+
#########################################################
58+
### Move Folders ########################################
59+
#########################################################
60+
if not telemetry_opt_out():
61+
if os.path.exists(KEY_FILE) and not os.path.isdir(HIDDEN_DIR):
62+
temp_deepeval_file_name = ".deepeval_temp"
63+
os.rename(KEY_FILE, temp_deepeval_file_name)
64+
os.makedirs(HIDDEN_DIR, exist_ok=True)
65+
os.rename(temp_deepeval_file_name, os.path.join(HIDDEN_DIR, KEY_FILE))
66+
67+
os.makedirs(HIDDEN_DIR, exist_ok=True)
68+
69+
if os.path.exists(TELEMETRY_DATA_FILE):
70+
os.rename(TELEMETRY_DATA_FILE, TELEMETRY_PATH)
71+
72+
if os.path.exists(".deepeval-cache.json"):
73+
os.rename(".deepeval-cache.json", f"{HIDDEN_DIR}/.deepeval-cache.json")
74+
75+
if os.path.exists(".temp_test_run_data.json"):
76+
os.rename(
77+
".temp_test_run_data.json", f"{HIDDEN_DIR}/.temp_test_run_data.json"
78+
)
79+
80+
#########################################################
81+
### Telemetry Config ####################################
82+
#########################################################
83+
8084
anonymous_public_ip = None
8185

8286
if not telemetry_opt_out():

deepeval/tracing/tracing.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ def __init__(self):
9090

9191
self.sampling_rate = os.environ.get(CONFIDENT_SAMPLE_RATE, 1)
9292
validate_sampling_rate(self.sampling_rate)
93+
self.openai_client = None
9394

9495
# Register an exit handler to warn about unprocessed traces
9596
atexit.register(self._warn_on_exit)
@@ -130,6 +131,7 @@ def configure(
130131
if confident_api_key is not None:
131132
self.confident_api_key = confident_api_key
132133
if openai_client is not None:
134+
self.openai_client = openai_client
133135
patch_openai_client(openai_client)
134136

135137
def start_new_trace(self) -> Trace:
@@ -655,7 +657,6 @@ def __init__(
655657
],
656658
func_name: str,
657659
metrics: Optional[Union[List[str], List[BaseMetric]]] = None,
658-
client: Optional[Any] = None,
659660
_progress: Optional[Progress] = None,
660661
_pbar_callback_id: Optional[int] = None,
661662
**kwargs,
@@ -680,7 +681,6 @@ def __init__(
680681
self.span_type: SpanType | str = (
681682
self.name if span_type is None else span_type
682683
)
683-
self.client = client
684684
self._progress = _progress
685685
self._pbar_callback_id = _pbar_callback_id
686686

@@ -816,9 +816,10 @@ def create_span_instance(self):
816816
)
817817
elif self.span_type == SpanType.LLM.value:
818818
model = self.observe_kwargs.get("model", None)
819-
if model is None and self.client is None:
820-
raise ValueError("model or client is required for LlmSpan")
821-
819+
if model is None and not trace_manager.openai_client:
820+
raise ValueError(
821+
"Either provide a model in observe or configure an openai_client in trace_manager. For more information on openai_client, see https://documentation.confident-ai.com/llm-tracing/integrations/openai"
822+
)
822823
return LlmSpan(**span_kwargs, attributes=None, model=model)
823824
elif self.span_type == SpanType.RETRIEVER.value:
824825
embedder = self.observe_kwargs.get("embedder", None)
@@ -905,7 +906,6 @@ def observe(
905906
type: Optional[
906907
Union[Literal["agent", "llm", "retriever", "tool"], str]
907908
] = None,
908-
client: Optional[Any] = None,
909909
**observe_kwargs,
910910
):
911911
"""
@@ -941,7 +941,6 @@ async def async_wrapper(*args, **func_kwargs):
941941
type,
942942
metrics=metrics,
943943
func_name=func_name,
944-
client=client,
945944
**observer_kwargs,
946945
) as observer:
947946
# Call the original function
@@ -970,7 +969,6 @@ def wrapper(*args, **func_kwargs):
970969
type,
971970
metrics=metrics,
972971
func_name=func_name,
973-
client=client,
974972
**observer_kwargs,
975973
) as observer:
976974
# Call the original function

0 commit comments

Comments
 (0)