Merge branch 'confident-ai:main' into main

A-Vamshi · web-flow · commit 2418b9f841f3 · 2025-06-12T22:54:49.000+05:30
diff --git a/CITATION.cff b/CITATION.cff
@@ -6,7 +6,7 @@ authors:
   - family-names: Vongthongsri
     given-names: Kritin
 title: deepeval
-version: 3.0.8
+version: 3.1.0
 date-released: "2025-06-08"
 url: https://confident-ai.com
 repository-code: https://github.com/confident-ai/deepeval
diff --git a/README.md b/README.md
@@ -34,6 +34,18 @@
     </a>
 </p>
 
+<p align="center">
+    <!-- Keep these links. Translations will automatically update with the README. -->
+    <a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=de">Deutsch</a> | 
+    <a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=es">Español</a> | 
+    <a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=fr">français</a> | 
+    <a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=ja">日本語</a> | 
+    <a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=ko">한국어</a> | 
+    <a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=pt">Português</a> | 
+    <a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=ru">Русский</a> | 
+    <a href="https://www.readme-i18n.com/confident-ai/deepeval?lang=zh">中文</a>
+</p>
+
 **DeepEval** is a simple-to-use, open-source LLM evaluation framework, for evaluating and testing large-language model systems. It is similar to Pytest but specialized for unit testing LLM outputs. DeepEval incorporates the latest research to evaluate LLM outputs based on metrics such as G-Eval, hallucination, answer relevancy, RAGAS, etc., which uses LLMs and various other NLP models that runs **locally on your machine** for evaluation.
 
 Whether your LLM applications are RAG pipelines, chatbots, AI agents, implemented via LangChain or LlamaIndex, DeepEval has you covered. With it, you can easily determine the optimal models, prompts, and architecture to improve your RAG pipeline, agentic workflows, prevent prompt drifting, or even transition from OpenAI to hosting your own Deepseek R1 with confidence.
diff --git a/bb.py b/bb.py
@@ -4,7 +4,7 @@
 
 # Conversation 1: Initial contact and address collection
 test_case_1 = ConversationalTestCase(
-    chatbot_role="A humble and doubtful wizard",
+    # chatbot_role="A humble and doubtful wizard",
     turns=[
         Turn(
             role="assistant",
@@ -34,7 +34,7 @@
 
 # Conversation 2: Account number correction and personal details
 test_case_2 = ConversationalTestCase(
-    chatbot_role="A humble and doubtful wizard",
+    # chatbot_role="A humble and doubtful wizard",
     turns=[
         Turn(
             role="assistant",
@@ -62,7 +62,7 @@
 
 # Conversation 3: Phone details and final confirmation with tools
 test_case_3 = ConversationalTestCase(
-    chatbot_role="A humble and doubtful wizard",
+    # chatbot_role="A hupmble and doubtful wizard",
     turns=[
         Turn(role="user", content="555-0102"),
         Turn(
diff --git a/deepeval/__init__.py b/deepeval/__init__.py
@@ -69,5 +69,9 @@ def update_warning_opt_in():
     return os.getenv("DEEPEVAL_UPDATE_WARNING_OPT_IN") == "YES"
 
 
+def is_read_only_env():
+    return os.getenv("DEEPEVAL_FILE_SYSTEM") == "READ_ONLY"
+
+
 if update_warning_opt_in():
     check_for_update()
diff --git a/deepeval/_version.py b/deepeval/_version.py
@@ -1 +1 @@
-__version__: str = "3.0.8"
+__version__: str = "3.1.0"
diff --git a/deepeval/metrics/conversational_g_eval/conversational_g_eval.py b/deepeval/metrics/conversational_g_eval/conversational_g_eval.py
@@ -84,9 +84,7 @@ def measure(
         _show_indicator: bool = True,
         _in_component: bool = False,
     ) -> float:
-        check_conversational_test_case_params(
-            test_case, self.evaluation_params, self
-        )
+        check_conversational_test_case_params(test_case, self)
 
         self.evaluation_cost = 0 if self.using_native_model else None
         with metric_progress_indicator(
@@ -132,9 +130,7 @@ async def a_measure(
         _show_indicator: bool = True,
         _in_component: bool = False,
     ) -> float:
-        check_conversational_test_case_params(
-            test_case, self.evaluation_params, self
-        )
+        check_conversational_test_case_params(test_case, self)
 
         self.evaluation_cost = 0 if self.using_native_model else None
         with metric_progress_indicator(
diff --git a/deepeval/synthesizer/synthesizer.py b/deepeval/synthesizer/synthesizer.py
@@ -214,6 +214,7 @@ def generate_goldens_from_docs(
                     _send_data=False,
                     _reset_cost=False,
                 )
+                self.synthetic_goldens.extend(goldens)
                 if self.cost_tracking and self.using_native_model:
                     print(f"💰 API cost: {self.synthesis_cost:.6f}")
                 if _send_data == True:
@@ -506,12 +507,11 @@ def generate_goldens_from_contexts(
                 # Remove pbar if not from docs
                 remove_pbars(progress, [pbar_id]) if _progress is None else None
 
-        # Wrap-up Synthesis
-        self.synthetic_goldens.extend(goldens)
         if _send_data == True:
             pass
         if _reset_cost and self.cost_tracking and self.using_native_model:
             print(f"💰 API cost: {self.synthesis_cost:.6f}")
+        self.synthetic_goldens.extend(goldens)
         return goldens
 
     async def a_generate_goldens_from_contexts(
@@ -544,7 +544,6 @@ async def a_generate_goldens_from_contexts(
         ) as (progress, pbar_id), (
             progress if _progress is None else nullcontext()
         ):
-
             tasks = [
                 self.task_wrapper(
                     semaphore,
@@ -567,6 +566,7 @@ async def a_generate_goldens_from_contexts(
 
         if _reset_cost and self.cost_tracking and self.using_native_model:
             print(f"💰 API cost: {self.synthesis_cost:.6f}")
+        self.synthetic_goldens.extend(goldens)
         return goldens
 
     async def _a_generate_from_context(
@@ -722,6 +722,7 @@ async def process_input(
             + [pbar_generate_inputs_id, pbar_generate_goldens_id],
         )
         goldens.extend(results)
+        self.synthetic_goldens.extend(goldens)
 
     async def _a_generate_text_to_sql_from_context(
         self,
@@ -769,7 +770,6 @@ async def a_generate_goldens_from_scratch(
         self,
         num_goldens: int,
     ) -> List[Golden]:
-
         if (
             self.styling_config.scenario is None
             or self.styling_config.task is None
@@ -838,7 +838,9 @@ async def evolve_input(i, data: SyntheticData):
                 )
                 for evolved_prompt, evolutions in evolved_prompts_list
             ]
-            return goldens
+
+        self.synthetic_goldens.extend(goldens)
+        return goldens
 
     def generate_goldens_from_scratch(
         self,
@@ -917,6 +919,7 @@ def generate_goldens_from_scratch(
         self.synthetic_goldens.extend(goldens)
         if _send_data == True:
             pass
+        self.synthetic_goldens.extend(goldens)
         return goldens
 
     def transform_distribution(
@@ -987,7 +990,6 @@ def generate_goldens_from_goldens(
                     **styles_json, expected_output_format=None
                 )
                 self.styling_config = styling_config
-
             # Generate goldens from scratch or from contexts if available
             if len(contexts) == 0:
                 return self.generate_goldens_from_scratch(
diff --git a/deepeval/telemetry.py b/deepeval/telemetry.py
@@ -28,30 +28,7 @@ class Feature(Enum):
 TELEMETRY_PATH = os.path.join(HIDDEN_DIR, TELEMETRY_DATA_FILE)
 
 #########################################################
-### Move Folders ########################################
-#########################################################
-
-if os.path.exists(KEY_FILE) and not os.path.isdir(HIDDEN_DIR):
-    temp_deepeval_file_name = ".deepeval_temp"
-    os.rename(KEY_FILE, temp_deepeval_file_name)
-    os.makedirs(HIDDEN_DIR, exist_ok=True)
-    os.rename(temp_deepeval_file_name, os.path.join(HIDDEN_DIR, KEY_FILE))
-
-os.makedirs(HIDDEN_DIR, exist_ok=True)
-
-if os.path.exists(TELEMETRY_DATA_FILE):
-    os.rename(TELEMETRY_DATA_FILE, TELEMETRY_PATH)
-
-if os.path.exists(".deepeval-cache.json"):
-    os.rename(".deepeval-cache.json", f"{HIDDEN_DIR}/.deepeval-cache.json")
-
-if os.path.exists(".temp_test_run_data.json"):
-    os.rename(
-        ".temp_test_run_data.json", f"{HIDDEN_DIR}/.temp_test_run_data.json"
-    )
-
-#########################################################
-### Telemetry Config ####################################
+### Telemetry HELPERS ###################################
 #########################################################
 
 
@@ -77,6 +54,33 @@ def get_anonymous_public_ip():
     return None
 
 
+#########################################################
+### Move Folders ########################################
+#########################################################
+if not telemetry_opt_out():
+    if os.path.exists(KEY_FILE) and not os.path.isdir(HIDDEN_DIR):
+        temp_deepeval_file_name = ".deepeval_temp"
+        os.rename(KEY_FILE, temp_deepeval_file_name)
+        os.makedirs(HIDDEN_DIR, exist_ok=True)
+        os.rename(temp_deepeval_file_name, os.path.join(HIDDEN_DIR, KEY_FILE))
+
+    os.makedirs(HIDDEN_DIR, exist_ok=True)
+
+    if os.path.exists(TELEMETRY_DATA_FILE):
+        os.rename(TELEMETRY_DATA_FILE, TELEMETRY_PATH)
+
+    if os.path.exists(".deepeval-cache.json"):
+        os.rename(".deepeval-cache.json", f"{HIDDEN_DIR}/.deepeval-cache.json")
+
+    if os.path.exists(".temp_test_run_data.json"):
+        os.rename(
+            ".temp_test_run_data.json", f"{HIDDEN_DIR}/.temp_test_run_data.json"
+        )
+
+#########################################################
+### Telemetry Config ####################################
+#########################################################
+
 anonymous_public_ip = None
 
 if not telemetry_opt_out():
diff --git a/deepeval/tracing/tracing.py b/deepeval/tracing/tracing.py
@@ -90,6 +90,7 @@ def __init__(self):
 
         self.sampling_rate = os.environ.get(CONFIDENT_SAMPLE_RATE, 1)
         validate_sampling_rate(self.sampling_rate)
+        self.openai_client = None
 
         # Register an exit handler to warn about unprocessed traces
         atexit.register(self._warn_on_exit)
@@ -130,6 +131,7 @@ def configure(
         if confident_api_key is not None:
             self.confident_api_key = confident_api_key
         if openai_client is not None:
+            self.openai_client = openai_client
             patch_openai_client(openai_client)
 
     def start_new_trace(self) -> Trace:
@@ -655,7 +657,6 @@ def __init__(
         ],
         func_name: str,
         metrics: Optional[Union[List[str], List[BaseMetric]]] = None,
-        client: Optional[Any] = None,
         _progress: Optional[Progress] = None,
         _pbar_callback_id: Optional[int] = None,
         **kwargs,
@@ -680,7 +681,6 @@ def __init__(
         self.span_type: SpanType | str = (
             self.name if span_type is None else span_type
         )
-        self.client = client
         self._progress = _progress
         self._pbar_callback_id = _pbar_callback_id
 
@@ -816,9 +816,10 @@ def create_span_instance(self):
             )
         elif self.span_type == SpanType.LLM.value:
             model = self.observe_kwargs.get("model", None)
-            if model is None and self.client is None:
-                raise ValueError("model or client is required for LlmSpan")
-
+            if model is None and not trace_manager.openai_client:
+                raise ValueError(
+                    "Either provide a model in observe or configure an openai_client in trace_manager. For more information on openai_client, see https://documentation.confident-ai.com/llm-tracing/integrations/openai"
+                )
             return LlmSpan(**span_kwargs, attributes=None, model=model)
         elif self.span_type == SpanType.RETRIEVER.value:
             embedder = self.observe_kwargs.get("embedder", None)
@@ -905,7 +906,6 @@ def observe(
     type: Optional[
         Union[Literal["agent", "llm", "retriever", "tool"], str]
     ] = None,
-    client: Optional[Any] = None,
     **observe_kwargs,
 ):
     """
@@ -941,7 +941,6 @@ async def async_wrapper(*args, **func_kwargs):
                     type,
                     metrics=metrics,
                     func_name=func_name,
-                    client=client,
                     **observer_kwargs,
                 ) as observer:
                     # Call the original function
@@ -970,7 +969,6 @@ def wrapper(*args, **func_kwargs):
                     type,
                     metrics=metrics,
                     func_name=func_name,
-                    client=client,
                     **observer_kwargs,
                 ) as observer:
                     # Call the original function
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/test_openai_patch.py b/test_openai_patch.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__: str = "3.0.8"`
	`1`	`+__version__: str = "3.1.0"`