Skip to content

Cleanup synthesizer #839

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions deepeval/progress_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ def synthesizer_progress_context(
evaluation_model: str,
embedder: str = None,
max_generations: str = None,
use_case: str = "QA",
_show_indicator: bool = True,
):
with capture_synthesizer_run(max_generations):
if embedder is None:
description = f"✨ 🍰 ✨ You're generating up to {max_generations} goldens using DeepEval's latest Synthesizer (using {evaluation_model})! This may take a while..."
description = f"✨ 🍰 ✨ You're generating up to {max_generations} goldens using DeepEval's latest Synthesizer (using {evaluation_model}, use case={use_case})! This may take a while..."
else:
description = f"✨ 🍰 ✨ You're generating up to {max_generations} goldens using DeepEval's latest Synthesizer (using {evaluation_model} and {embedder})! This may take a while..."
description = f"✨ 🍰 ✨ You're generating up to {max_generations} goldens using DeepEval's latest Synthesizer (using {evaluation_model} and {embedder}, use case={use_case})! This may take a while..."
console = Console(file=sys.stderr) # Direct output to standard error
if _show_indicator:
with Progress(
Expand Down
2 changes: 1 addition & 1 deletion deepeval/synthesizer/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .synthesizer import Synthesizer
from .synthesizer import Synthesizer, UseCase
58 changes: 4 additions & 54 deletions deepeval/synthesizer/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,11 @@
import random
import math

sys.path.append(r"C:\Users\bombk\OneDrive\Documents\GitHub\deepeval")

from deepeval.synthesizer.template import EvolutionTemplate, SynthesizerTemplate
from deepeval.synthesizer.template_prompt import (
PromptEvolutionTemplate,
PromptSynthesizerTemplate,
)

from deepeval.synthesizer.context_generator import ContextGenerator
from deepeval.synthesizer.utils import initialize_embedding_model
from deepeval.models import DeepEvalBaseLLM
Expand Down Expand Up @@ -299,6 +296,7 @@ def generate_goldens_from_scratch(
self.model.get_model_name(),
None,
(num_initial_goldens + 1) * num_evolutions,
None,
_show_indicator,
):
goldens: List[Golden] = []
Expand Down Expand Up @@ -356,6 +354,7 @@ def generate_goldens_from_prompts(
self.model.get_model_name(),
None,
len(prompts) * num_evolutions,
None,
_show_indicator,
):
goldens: List[Golden] = []
Expand Down Expand Up @@ -421,6 +420,7 @@ def generate_goldens(
self.model.get_model_name(),
None,
len(contexts) * max_goldens_per_context,
use_case.value,
_show_indicator,
):
goldens: List[Golden] = []
Expand Down Expand Up @@ -506,6 +506,7 @@ def generate_goldens(
self.model.get_model_name(),
None,
len(contexts) * max_goldens_per_context,
use_case.value,
_show_indicator,
):

Expand Down Expand Up @@ -684,54 +685,3 @@ def save_as(self, file_type: str, directory: str) -> str:

print(f"Synthetic goldens saved at {full_file_path}!")
return full_file_path


if __name__ == "__main__":
table1 = """CREATE TABLE Students (
StudentID INT PRIMARY KEY,
FirstName VARCHAR(50),
LastName VARCHAR(50),
Email VARCHAR(100) UNIQUE,
DateOfBirth DATE,
Gender CHAR(1),
Address VARCHAR(200),
PhoneNumber VARCHAR(15)
);"""

table2 = """CREATE TABLE Courses (
CourseID INT PRIMARY KEY,
CourseName VARCHAR(100),
TeacherID INT,
Credits INT,
DepartmentID INT,
FOREIGN KEY (TeacherID) REFERENCES Teachers(TeacherID),
FOREIGN KEY (DepartmentID) REFERENCES Departments(DepartmentID)
);"""

table3 = """CREATE TABLE Enrollments (
EnrollmentID INT PRIMARY KEY,
StudentID INT,
CourseID INT,
EnrollmentDate DATE,
Grade CHAR(2),
FOREIGN KEY (StudentID) REFERENCES Students(StudentID),
FOREIGN KEY (CourseID) REFERENCES Courses(CourseID)
);"""

table4 = """CREATE TABLE Teachers (
TeacherID INT PRIMARY KEY,
FirstName VARCHAR(50),
LastName VARCHAR(50),
Email VARCHAR(100) UNIQUE,
DepartmentID INT,
FOREIGN KEY (DepartmentID) REFERENCES Departments(DepartmentID)
);"""

contexts = [[table1, table2, table3, table4]]
synthesizer = Synthesizer()
text_to_sql_goldens = synthesizer.generate_goldens(
max_goldens_per_context=15, contexts=contexts, use_case=UseCase.TEXT2SQL
)
for golden in text_to_sql_goldens:
print("Input : " + str(golden.input))
print("Expected Output : " + str(golden.expected_output))
52 changes: 51 additions & 1 deletion tests/test_synthesizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
import pytest
from deepeval.synthesizer import Synthesizer
from deepeval.synthesizer import Synthesizer, UseCase
from deepeval.dataset import EvaluationDataset
from deepeval.models import OpenAIEmbeddingModel

Expand Down Expand Up @@ -44,3 +44,53 @@ def test_synthesizer():
# max_goldens_per_document=2,
# )
# dataset.save_as(file_type="json", directory="./results")


# table1 = """CREATE TABLE Students (
# StudentID INT PRIMARY KEY,
# FirstName VARCHAR(50),
# LastName VARCHAR(50),
# Email VARCHAR(100) UNIQUE,
# DateOfBirth DATE,
# Gender CHAR(1),
# Address VARCHAR(200),
# PhoneNumber VARCHAR(15)
# );"""

# table2 = """CREATE TABLE Courses (
# CourseID INT PRIMARY KEY,
# CourseName VARCHAR(100),
# TeacherID INT,
# Credits INT,
# DepartmentID INT,
# FOREIGN KEY (TeacherID) REFERENCES Teachers(TeacherID),
# FOREIGN KEY (DepartmentID) REFERENCES Departments(DepartmentID)
# );"""

# table3 = """CREATE TABLE Enrollments (
# EnrollmentID INT PRIMARY KEY,
# StudentID INT,
# CourseID INT,
# EnrollmentDate DATE,
# Grade CHAR(2),
# FOREIGN KEY (StudentID) REFERENCES Students(StudentID),
# FOREIGN KEY (CourseID) REFERENCES Courses(CourseID)
# );"""

# table4 = """CREATE TABLE Teachers (
# TeacherID INT PRIMARY KEY,
# FirstName VARCHAR(50),
# LastName VARCHAR(50),
# Email VARCHAR(100) UNIQUE,
# DepartmentID INT,
# FOREIGN KEY (DepartmentID) REFERENCES Departments(DepartmentID)
# );"""

# contexts = [[table1, table2, table3, table4]]
# synthesizer = Synthesizer()
# text_to_sql_goldens = synthesizer.generate_goldens(
# max_goldens_per_context=15, contexts=contexts, use_case=UseCase.TEXT2SQL
# )
# for golden in text_to_sql_goldens:
# print("Input : " + str(golden.input))
# print("Expected Output : " + str(golden.expected_output))
Loading