customizations

kritinv · Oct 15, 2024 · 069f0e3 · 069f0e3
1 parent 0f94fd3
commit 069f0e3
Show file tree

Hide file tree

Showing 5 changed files with 256 additions and 117 deletions.
diff --git a/deepeval/synthesizer/synthesizer.py b/deepeval/synthesizer/synthesizer.py
@@ -123,6 +123,10 @@ def generate_goldens_from_docs(
             Evolution.IN_BREADTH: 1 / 7,
         },
         use_case: UseCase = UseCase.QA,
+        scenario: Optional[str] = None,
+        task: Optional[str] = None,
+        input_format: Optional[str] = None,
+        expected_output_format: Optional[str] = None,
         _send_data=True,
     ):
         # Set Embedder if not defined
@@ -142,6 +146,10 @@ def generate_goldens_from_docs(
                     num_evolutions,
                     evolutions,
                     use_case,
+                    scenario,
+                    task,
+                    input_format,
+                    expected_output_format,
                 )
             )
         else:
@@ -183,6 +191,10 @@ def generate_goldens_from_docs(
                     source_files,
                     evolutions=evolutions,
                     use_case=use_case,
+                    scenario=scenario,
+                    task=task,
+                    input_format=input_format,
+                    expected_output_format=expected_output_format,
                     _context_scores=context_scores,
                     _progress_bar=progress_bar,
                     _send_data=False,
@@ -212,6 +224,10 @@ async def a_generate_goldens_from_docs(
             Evolution.IN_BREADTH: 1 / 7,
         },
         use_case: UseCase = UseCase.QA,
+        scenario: Optional[str] = None,
+        task: Optional[str] = None,
+        input_format: Optional[str] = None,
+        expected_output_format: Optional[str] = None,
     ):
         # Set Embedder if not defined
         if self.embedder is None:
@@ -256,6 +272,10 @@ async def a_generate_goldens_from_docs(
                 source_files=source_files,
                 evolutions=evolutions,
                 use_case=use_case,
+                scenario=scenario,
+                task=task,
+                input_format=input_format,
+                expected_output_format=expected_output_format,
                 _context_scores=context_scores,
                 _progress_bar=progress_bar,
             )
@@ -283,6 +303,10 @@ def generate_goldens(
             Evolution.IN_BREADTH: 1 / 7,
         },
         use_case: UseCase = UseCase.QA,
+        scenario: Optional[str] = None,
+        task: Optional[str] = None,
+        input_format: Optional[str] = None,
+        expected_output_format: Optional[str] = None,
         _context_scores: Optional[List[float]] = None,
         _progress_bar: Optional[tqdm.std.tqdm] = None,
         _send_data: bool = True,
@@ -302,6 +326,10 @@ def generate_goldens(
                         source_files=source_files,
                         evolutions=evolutions,
                         use_case=use_case,
+                        scenario=scenario,
+                        task=task,
+                        input_format=input_format,
+                        expected_output_format=expected_output_format
                     )
                 )
             )
@@ -322,6 +350,9 @@ def generate_goldens(
                         prompt = SynthesizerTemplate.generate_synthetic_inputs(
                             context=context,
                             max_goldens_per_context=max_goldens_per_context,
+                            scenario=scenario,
+                            task=task,
+                            input_format=input_format,
                         )
                         synthetic_inputs = self._generate_inputs(prompt)
 
@@ -367,6 +398,7 @@ def generate_goldens(
                                 prompt = SynthesizerTemplate.generate_synthetic_expected_output(
                                     input=golden.input,
                                     context="\n".join(golden.context),
+                                    expected_output_format=expected_output_format
                                 )
                                 res = self._generate(prompt)
                                 golden.expected_output = res
@@ -441,6 +473,10 @@ async def a_generate_goldens(
             Evolution.IN_BREADTH: 1 / 7,
         },
         use_case: UseCase = UseCase.QA,
+        scenario: Optional[str] = None,
+        task: Optional[str] = None,
+        input_format: Optional[str] = None,
+        expected_output_format: Optional[str] = None,
         _context_scores: Optional[List[float]] = None,
         _progress_bar: Optional[tqdm.std.tqdm] = None,
     ) -> List[Golden]:
@@ -467,6 +503,10 @@ async def a_generate_goldens(
                         evolutions=evolutions,
                         progress_bar=progress_bar,
                         context_scores=_context_scores,
+                        scenario=scenario,
+                        task=task,
+                        input_format=input_format,
+                        expected_output_format=expected_output_format
                     )
                     for index, context in enumerate(contexts)
                 ]
@@ -508,10 +548,18 @@ async def _a_generate_from_context(
         evolutions: List[Evolution],
         progress_bar: tqdm.std.tqdm,
         context_scores: Optional[List[float]] = None,
+        scenario: Optional[str] = None,
+        task: Optional[str] = None,
+        input_format: Optional[str] = None,
+        expected_output_format: Optional[str] = None,
     ):
         # Generate inputs
         prompt = SynthesizerTemplate.generate_synthetic_inputs(
-            context=context, max_goldens_per_context=max_goldens_per_context
+            context=context, 
+            max_goldens_per_context=max_goldens_per_context,
+            scenario=scenario,
+            task=task,
+            input_format=input_format,
         )
         synthetic_inputs: List[SyntheticData] = await self._a_generate_inputs(
             prompt
@@ -538,7 +586,9 @@ async def _a_generate_from_context(
             if include_expected_output:
                 expected_output_prompt = (
                     SynthesizerTemplate.generate_synthetic_expected_output(
-                        input=evolved_input, context="\n".join(context)
+                        input=evolved_input, 
+                        context="\n".join(context),
+                        expected_output_format=expected_output_format
                     )
                 )
                 expected_output = await self._a_generate(expected_output_prompt)
@@ -612,20 +662,21 @@ async def _a_generate_text_to_sql_from_context(
 
     async def a_generate_goldens_from_scratch(
         self,
-        subject: str,
+        scenario: str,
         task: str,
-        output_format: str,
+        input_format: str,
         num_initial_goldens: int,
         num_evolutions: int = 1,
-        evolutions: Dict[PromptEvolution, float] = {
-            PromptEvolution.REASONING: 1 / 6,
-            PromptEvolution.CONCRETIZING: 1 / 6,
-            PromptEvolution.CONSTRAINED: 1 / 6,
-            PromptEvolution.COMPARATIVE: 1 / 6,
-            PromptEvolution.HYPOTHETICAL: 1 / 6,
-            PromptEvolution.IN_BREADTH: 1 / 6,
+        evolutions: Dict[Evolution, float] = {
+            Evolution.REASONING: 1 / 6,
+            Evolution.CONCRETIZING: 1 / 6,
+            Evolution.CONSTRAINED: 1 / 6,
+            Evolution.COMPARATIVE: 1 / 6,
+            Evolution.HYPOTHETICAL: 1 / 6,
+            Evolution.IN_BREADTH: 1 / 6,
         },
     ) -> List[Golden]:
+        evolutions = self.transform_distribution(evolutions)
         goldens: List[Golden] = []
         with synthesizer_progress_context(
             method="Scratch",
@@ -639,9 +690,9 @@ async def a_generate_goldens_from_scratch(
 
             # Generate inputs
             prompt: List = PromptSynthesizerTemplate.generate_synthetic_prompts(
-                subject=subject,
+                scenario=scenario,
                 task=task,
-                output_format=output_format,
+                input_format=input_format,
                 num_initial_goldens=num_initial_goldens,
             )
             synthetic_data = self._generate_inputs(prompt)
@@ -670,30 +721,31 @@ async def a_generate_goldens_from_scratch(
 
     def generate_goldens_from_scratch(
         self,
-        subject: str,
+        scenario: str,
         task: str,
-        output_format: str,
+        input_format: str,
         num_initial_goldens: int,
         num_evolutions: int = 1,
-        evolutions: Dict[PromptEvolution, float] = {
-            PromptEvolution.REASONING: 1 / 6,
-            PromptEvolution.CONCRETIZING: 1 / 6,
-            PromptEvolution.CONSTRAINED: 1 / 6,
-            PromptEvolution.COMPARATIVE: 1 / 6,
-            PromptEvolution.HYPOTHETICAL: 1 / 6,
-            PromptEvolution.IN_BREADTH: 1 / 6,
+        evolutions: Dict[Evolution, float] = {
+            Evolution.REASONING: 1 / 6,
+            Evolution.CONCRETIZING: 1 / 6,
+            Evolution.CONSTRAINED: 1 / 6,
+            Evolution.COMPARATIVE: 1 / 6,
+            Evolution.HYPOTHETICAL: 1 / 6,
+            Evolution.IN_BREADTH: 1 / 6,
         },
         _send_data: bool = True,
     ) -> List[Golden]:
+        evolutions = self.transform_distribution(evolutions)
         goldens: List[Golden] = []
         if self.async_mode:
             loop = get_or_create_event_loop()
             goldens.extend(
                 loop.run_until_complete(
                     self.a_generate_goldens_from_scratch(
-                        subject=subject,
+                        scenario=scenario,
                         task=task,
-                        output_format=output_format,
+                        input_format=input_format,
                         num_evolutions=num_evolutions,
                         num_initial_goldens=num_initial_goldens,
                         evolutions=evolutions,
@@ -714,9 +766,9 @@ def generate_goldens_from_scratch(
                 # Generate inputs
                 prompt: List = (
                     PromptSynthesizerTemplate.generate_synthetic_prompts(
-                        subject=subject,
+                        scenario=scenario,
                         task=task,
-                        output_format=output_format,
+                        input_format=input_format,
                         num_initial_goldens=num_initial_goldens,
                     )
                 )
@@ -743,6 +795,22 @@ def generate_goldens_from_scratch(
         if _send_data == True:
             self._wrap_up_synthesis()
         return goldens
+
+
+    def transform_distribution(self, evolutions: Dict[Evolution, float]) -> Dict[PromptEvolution, float]:
+        prompt_evolutions: Dict[PromptEvolution, float] = {}
+        for evo, weight in evolutions.items():
+            prompt_evolution = self.map_evolution_to_prompt_evolution(evo)
+            prompt_evolutions[prompt_evolution] = weight
+        return prompt_evolutions
+
+
+    def map_evolution_to_prompt_evolution(self, evolution: Evolution) -> PromptEvolution:
+        try:
+            return PromptEvolution[evolution.name]
+        except KeyError:
+            raise KeyError(f"Evolution '{evolution.name}' not available for this method.")
+
 
     #############################################################
     # Helper Methods for Input Generation

diff --git a/deepeval/synthesizer/templates/template.py b/deepeval/synthesizer/templates/template.py
@@ -1,3 +1,5 @@
+from typing import Optional
+
 class SynthesizerTemplate:
 
     @staticmethod
@@ -78,12 +80,17 @@ def generate_text2sql_expected_output(input, context):
         JSON:
         """
 
-    @staticmethod
-    def generate_synthetic_expected_output(input, context):
+    def generate_synthetic_expected_output(input: str, context: str, expected_output_format: Optional[str]):
+        important_section = (
+            f"IMPORTANT: Please ensure that the generated response strictly adheres to the following format: {expected_output_format}, and make sure it is concise and straight to the point, using supporting information in context."
+            if expected_output_format
+            else "IMPORTANT: Please make sure to generate a response that is concise and straight to the point, and uses supporting information in context."
+        )
+
         return f"""Given the input, which may or may not be a question, generate a response using information presented in context.
 
         **
-        IMPORTANT: Please make sure to generate a response that is concise and straight to the point, and uses supporting information in context.
+        {important_section}
         **
 
         Context:
@@ -95,9 +102,24 @@ def generate_synthetic_expected_output(input, context):
         Generated Response:
         """
 
+
     staticmethod
 
-    def generate_synthetic_inputs(context, max_goldens_per_context):
+    def generate_synthetic_inputs(
+            context: str, 
+            max_goldens_per_context: str, 
+            scenario: Optional[str], 
+            task: Optional[str], 
+            input_format: Optional[str]
+        ):
+        input_format_section = (
+            f"`input` MUST strictly adhere to the following format: {input_format}."
+            if input_format
+            else "`input` MUST be a STRING."
+        )
+        scenario_section = f"`input`s MUST be relevant to this specific scenario: ```{scenario}``` (The scenario provides the situation in which the inputs should be interpreted or used)." if scenario else ""
+        task_section = f"`input`s MUST be framed in a way that aligns with the purpose of the following task: {task}" if task else ""
+
         return f"""I want you act as a copywriter. Based on the given context, which is list of strings, please generate a list of JSON objects with a `input` key.
         The `input` can either be a question or a statement that can be addressed by the given context.
 
@@ -122,7 +144,9 @@ def generate_synthetic_inputs(context, max_goldens_per_context):
 
         You should NOT incorporate any prior knowledge you have and take each context at face value.
         You MUST include at least one statement as the input.
-        `input` MUST be a STRING.
+        {input_format_section}
+        {scenario_section}
+        {task_section}
         You MUST TRY to generate {max_goldens_per_context} data points, unless the generated `input` is getting reptitive.
         **
 

diff --git a/deepeval/synthesizer/templates/template_prompt.py b/deepeval/synthesizer/templates/template_prompt.py
@@ -1,18 +1,18 @@
 class PromptSynthesizerTemplate:
     @staticmethod
     def generate_synthetic_prompts(
-        subject: str, task: str, output_format: str, num_initial_goldens: int
+        scenario: str, task: str, input_format: str, num_initial_goldens: int
     ):
-        return f"""Generate a series of input prompts from scratch based on the provided subject, task, and output format.
-        The inputs must align with the given subject and task description, and conform to specified output format.
+        return f"""Generate a series of input prompts from scratch based on the provided scenario, task, and output format.
+        The inputs must align with the given scenario and task description, and conform to specified output format.
 
         **
         IMPORTANT: Please make sure to only return in JSON format, with the 'data' key as a list of JSON objects.
         You MUST TRY to generate {num_initial_goldens} data points.
 
-        Example subject: SQL queries querying a database called FAST_FOOD_RESTAURANTS
+        Example scenario: SQL queries querying a database called FAST_FOOD_RESTAURANTS
         Example task: Test all the SQL probable statements
-        Example output format: SQL String
+        Example input format: SQL String
         Example num initial prompts: 2
         Example JSON:
         {{
@@ -26,13 +26,13 @@ def generate_synthetic_prompts(
             ]  
         }}
 
-        You MUST include at least one statement as the input. `input` MUST be of `{output_format}` format.
+        You MUST include at least one statement as the input. `input` MUST be of `{input_format}` format.
         You MUST TRY to generate {num_initial_goldens} data points, unless the generated `input` is getting reptitive.
         **
 
-        subject: {subject}
+        scenario: {scenario}
         task: {task}
-        output format: {output_format}
+        input format: {input_format}
         num initial prompts: {num_initial_goldens}
         JSON:
         """