Skip to content

Commit

Permalink
customizations
Browse files Browse the repository at this point in the history
  • Loading branch information
kritinv committed Oct 15, 2024
1 parent 0f94fd3 commit 069f0e3
Show file tree
Hide file tree
Showing 5 changed files with 256 additions and 117 deletions.
120 changes: 94 additions & 26 deletions deepeval/synthesizer/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,10 @@ def generate_goldens_from_docs(
Evolution.IN_BREADTH: 1 / 7,
},
use_case: UseCase = UseCase.QA,
scenario: Optional[str] = None,
task: Optional[str] = None,
input_format: Optional[str] = None,
expected_output_format: Optional[str] = None,
_send_data=True,
):
# Set Embedder if not defined
Expand All @@ -142,6 +146,10 @@ def generate_goldens_from_docs(
num_evolutions,
evolutions,
use_case,
scenario,
task,
input_format,
expected_output_format,
)
)
else:
Expand Down Expand Up @@ -183,6 +191,10 @@ def generate_goldens_from_docs(
source_files,
evolutions=evolutions,
use_case=use_case,
scenario=scenario,
task=task,
input_format=input_format,
expected_output_format=expected_output_format,
_context_scores=context_scores,
_progress_bar=progress_bar,
_send_data=False,
Expand Down Expand Up @@ -212,6 +224,10 @@ async def a_generate_goldens_from_docs(
Evolution.IN_BREADTH: 1 / 7,
},
use_case: UseCase = UseCase.QA,
scenario: Optional[str] = None,
task: Optional[str] = None,
input_format: Optional[str] = None,
expected_output_format: Optional[str] = None,
):
# Set Embedder if not defined
if self.embedder is None:
Expand Down Expand Up @@ -256,6 +272,10 @@ async def a_generate_goldens_from_docs(
source_files=source_files,
evolutions=evolutions,
use_case=use_case,
scenario=scenario,
task=task,
input_format=input_format,
expected_output_format=expected_output_format,
_context_scores=context_scores,
_progress_bar=progress_bar,
)
Expand Down Expand Up @@ -283,6 +303,10 @@ def generate_goldens(
Evolution.IN_BREADTH: 1 / 7,
},
use_case: UseCase = UseCase.QA,
scenario: Optional[str] = None,
task: Optional[str] = None,
input_format: Optional[str] = None,
expected_output_format: Optional[str] = None,
_context_scores: Optional[List[float]] = None,
_progress_bar: Optional[tqdm.std.tqdm] = None,
_send_data: bool = True,
Expand All @@ -302,6 +326,10 @@ def generate_goldens(
source_files=source_files,
evolutions=evolutions,
use_case=use_case,
scenario=scenario,
task=task,
input_format=input_format,
expected_output_format=expected_output_format
)
)
)
Expand All @@ -322,6 +350,9 @@ def generate_goldens(
prompt = SynthesizerTemplate.generate_synthetic_inputs(
context=context,
max_goldens_per_context=max_goldens_per_context,
scenario=scenario,
task=task,
input_format=input_format,
)
synthetic_inputs = self._generate_inputs(prompt)

Expand Down Expand Up @@ -367,6 +398,7 @@ def generate_goldens(
prompt = SynthesizerTemplate.generate_synthetic_expected_output(
input=golden.input,
context="\n".join(golden.context),
expected_output_format=expected_output_format
)
res = self._generate(prompt)
golden.expected_output = res
Expand Down Expand Up @@ -441,6 +473,10 @@ async def a_generate_goldens(
Evolution.IN_BREADTH: 1 / 7,
},
use_case: UseCase = UseCase.QA,
scenario: Optional[str] = None,
task: Optional[str] = None,
input_format: Optional[str] = None,
expected_output_format: Optional[str] = None,
_context_scores: Optional[List[float]] = None,
_progress_bar: Optional[tqdm.std.tqdm] = None,
) -> List[Golden]:
Expand All @@ -467,6 +503,10 @@ async def a_generate_goldens(
evolutions=evolutions,
progress_bar=progress_bar,
context_scores=_context_scores,
scenario=scenario,
task=task,
input_format=input_format,
expected_output_format=expected_output_format
)
for index, context in enumerate(contexts)
]
Expand Down Expand Up @@ -508,10 +548,18 @@ async def _a_generate_from_context(
evolutions: List[Evolution],
progress_bar: tqdm.std.tqdm,
context_scores: Optional[List[float]] = None,
scenario: Optional[str] = None,
task: Optional[str] = None,
input_format: Optional[str] = None,
expected_output_format: Optional[str] = None,
):
# Generate inputs
prompt = SynthesizerTemplate.generate_synthetic_inputs(
context=context, max_goldens_per_context=max_goldens_per_context
context=context,
max_goldens_per_context=max_goldens_per_context,
scenario=scenario,
task=task,
input_format=input_format,
)
synthetic_inputs: List[SyntheticData] = await self._a_generate_inputs(
prompt
Expand All @@ -538,7 +586,9 @@ async def _a_generate_from_context(
if include_expected_output:
expected_output_prompt = (
SynthesizerTemplate.generate_synthetic_expected_output(
input=evolved_input, context="\n".join(context)
input=evolved_input,
context="\n".join(context),
expected_output_format=expected_output_format
)
)
expected_output = await self._a_generate(expected_output_prompt)
Expand Down Expand Up @@ -612,20 +662,21 @@ async def _a_generate_text_to_sql_from_context(

async def a_generate_goldens_from_scratch(
self,
subject: str,
scenario: str,
task: str,
output_format: str,
input_format: str,
num_initial_goldens: int,
num_evolutions: int = 1,
evolutions: Dict[PromptEvolution, float] = {
PromptEvolution.REASONING: 1 / 6,
PromptEvolution.CONCRETIZING: 1 / 6,
PromptEvolution.CONSTRAINED: 1 / 6,
PromptEvolution.COMPARATIVE: 1 / 6,
PromptEvolution.HYPOTHETICAL: 1 / 6,
PromptEvolution.IN_BREADTH: 1 / 6,
evolutions: Dict[Evolution, float] = {
Evolution.REASONING: 1 / 6,
Evolution.CONCRETIZING: 1 / 6,
Evolution.CONSTRAINED: 1 / 6,
Evolution.COMPARATIVE: 1 / 6,
Evolution.HYPOTHETICAL: 1 / 6,
Evolution.IN_BREADTH: 1 / 6,
},
) -> List[Golden]:
evolutions = self.transform_distribution(evolutions)
goldens: List[Golden] = []
with synthesizer_progress_context(
method="Scratch",
Expand All @@ -639,9 +690,9 @@ async def a_generate_goldens_from_scratch(

# Generate inputs
prompt: List = PromptSynthesizerTemplate.generate_synthetic_prompts(
subject=subject,
scenario=scenario,
task=task,
output_format=output_format,
input_format=input_format,
num_initial_goldens=num_initial_goldens,
)
synthetic_data = self._generate_inputs(prompt)
Expand Down Expand Up @@ -670,30 +721,31 @@ async def a_generate_goldens_from_scratch(

def generate_goldens_from_scratch(
self,
subject: str,
scenario: str,
task: str,
output_format: str,
input_format: str,
num_initial_goldens: int,
num_evolutions: int = 1,
evolutions: Dict[PromptEvolution, float] = {
PromptEvolution.REASONING: 1 / 6,
PromptEvolution.CONCRETIZING: 1 / 6,
PromptEvolution.CONSTRAINED: 1 / 6,
PromptEvolution.COMPARATIVE: 1 / 6,
PromptEvolution.HYPOTHETICAL: 1 / 6,
PromptEvolution.IN_BREADTH: 1 / 6,
evolutions: Dict[Evolution, float] = {
Evolution.REASONING: 1 / 6,
Evolution.CONCRETIZING: 1 / 6,
Evolution.CONSTRAINED: 1 / 6,
Evolution.COMPARATIVE: 1 / 6,
Evolution.HYPOTHETICAL: 1 / 6,
Evolution.IN_BREADTH: 1 / 6,
},
_send_data: bool = True,
) -> List[Golden]:
evolutions = self.transform_distribution(evolutions)
goldens: List[Golden] = []
if self.async_mode:
loop = get_or_create_event_loop()
goldens.extend(
loop.run_until_complete(
self.a_generate_goldens_from_scratch(
subject=subject,
scenario=scenario,
task=task,
output_format=output_format,
input_format=input_format,
num_evolutions=num_evolutions,
num_initial_goldens=num_initial_goldens,
evolutions=evolutions,
Expand All @@ -714,9 +766,9 @@ def generate_goldens_from_scratch(
# Generate inputs
prompt: List = (
PromptSynthesizerTemplate.generate_synthetic_prompts(
subject=subject,
scenario=scenario,
task=task,
output_format=output_format,
input_format=input_format,
num_initial_goldens=num_initial_goldens,
)
)
Expand All @@ -743,6 +795,22 @@ def generate_goldens_from_scratch(
if _send_data == True:
self._wrap_up_synthesis()
return goldens


def transform_distribution(self, evolutions: Dict[Evolution, float]) -> Dict[PromptEvolution, float]:
prompt_evolutions: Dict[PromptEvolution, float] = {}
for evo, weight in evolutions.items():
prompt_evolution = self.map_evolution_to_prompt_evolution(evo)
prompt_evolutions[prompt_evolution] = weight
return prompt_evolutions


def map_evolution_to_prompt_evolution(self, evolution: Evolution) -> PromptEvolution:
try:
return PromptEvolution[evolution.name]
except KeyError:
raise KeyError(f"Evolution '{evolution.name}' not available for this method.")


#############################################################
# Helper Methods for Input Generation
Expand Down
34 changes: 29 additions & 5 deletions deepeval/synthesizer/templates/template.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional

class SynthesizerTemplate:

@staticmethod
Expand Down Expand Up @@ -78,12 +80,17 @@ def generate_text2sql_expected_output(input, context):
JSON:
"""

@staticmethod
def generate_synthetic_expected_output(input, context):
def generate_synthetic_expected_output(input: str, context: str, expected_output_format: Optional[str]):
important_section = (
f"IMPORTANT: Please ensure that the generated response strictly adheres to the following format: {expected_output_format}, and make sure it is concise and straight to the point, using supporting information in context."
if expected_output_format
else "IMPORTANT: Please make sure to generate a response that is concise and straight to the point, and uses supporting information in context."
)

return f"""Given the input, which may or may not be a question, generate a response using information presented in context.
**
IMPORTANT: Please make sure to generate a response that is concise and straight to the point, and uses supporting information in context.
{important_section}
**
Context:
Expand All @@ -95,9 +102,24 @@ def generate_synthetic_expected_output(input, context):
Generated Response:
"""


staticmethod

def generate_synthetic_inputs(context, max_goldens_per_context):
def generate_synthetic_inputs(
context: str,
max_goldens_per_context: str,
scenario: Optional[str],
task: Optional[str],
input_format: Optional[str]
):
input_format_section = (
f"`input` MUST strictly adhere to the following format: {input_format}."
if input_format
else "`input` MUST be a STRING."
)
scenario_section = f"`input`s MUST be relevant to this specific scenario: ```{scenario}``` (The scenario provides the situation in which the inputs should be interpreted or used)." if scenario else ""
task_section = f"`input`s MUST be framed in a way that aligns with the purpose of the following task: {task}" if task else ""

return f"""I want you act as a copywriter. Based on the given context, which is list of strings, please generate a list of JSON objects with a `input` key.
The `input` can either be a question or a statement that can be addressed by the given context.
Expand All @@ -122,7 +144,9 @@ def generate_synthetic_inputs(context, max_goldens_per_context):
You should NOT incorporate any prior knowledge you have and take each context at face value.
You MUST include at least one statement as the input.
`input` MUST be a STRING.
{input_format_section}
{scenario_section}
{task_section}
You MUST TRY to generate {max_goldens_per_context} data points, unless the generated `input` is getting reptitive.
**
Expand Down
16 changes: 8 additions & 8 deletions deepeval/synthesizer/templates/template_prompt.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
class PromptSynthesizerTemplate:
@staticmethod
def generate_synthetic_prompts(
subject: str, task: str, output_format: str, num_initial_goldens: int
scenario: str, task: str, input_format: str, num_initial_goldens: int
):
return f"""Generate a series of input prompts from scratch based on the provided subject, task, and output format.
The inputs must align with the given subject and task description, and conform to specified output format.
return f"""Generate a series of input prompts from scratch based on the provided scenario, task, and output format.
The inputs must align with the given scenario and task description, and conform to specified output format.
**
IMPORTANT: Please make sure to only return in JSON format, with the 'data' key as a list of JSON objects.
You MUST TRY to generate {num_initial_goldens} data points.
Example subject: SQL queries querying a database called FAST_FOOD_RESTAURANTS
Example scenario: SQL queries querying a database called FAST_FOOD_RESTAURANTS
Example task: Test all the SQL probable statements
Example output format: SQL String
Example input format: SQL String
Example num initial prompts: 2
Example JSON:
{{
Expand All @@ -26,13 +26,13 @@ def generate_synthetic_prompts(
]
}}
You MUST include at least one statement as the input. `input` MUST be of `{output_format}` format.
You MUST include at least one statement as the input. `input` MUST be of `{input_format}` format.
You MUST TRY to generate {num_initial_goldens} data points, unless the generated `input` is getting reptitive.
**
subject: {subject}
scenario: {scenario}
task: {task}
output format: {output_format}
input format: {input_format}
num initial prompts: {num_initial_goldens}
JSON:
"""
Expand Down
Loading

0 comments on commit 069f0e3

Please sign in to comment.