From 9bca3c5012a1838cb778d867beb671b1e8c7c111 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Tue, 23 Jul 2024 17:30:31 -0700 Subject: [PATCH 01/16] Implemented the first step in deep focus initiative. Now, when the user says there is a bug during the testing step, GPT Pilot adds logs before attempting to fix the issue --- core/agents/bug_hunter.py | 190 ++++++++++++++++++ core/agents/developer.py | 72 +++---- core/agents/error_handler.py | 3 +- core/agents/mixins.py | 49 ++++- core/agents/orchestrator.py | 26 ++- core/agents/problem_solver.py | 2 + core/agents/troubleshooter.py | 79 ++++++-- core/config/magic_words.py | 2 + core/db/models/project_state.py | 17 +- .../bug-hunter/bug_found_or_add_logs.prompt | 6 + .../get_bug_reproduction_instructions.prompt | 31 +++ core/prompts/bug-hunter/iteration.prompt | 43 ++++ core/prompts/bug-hunter/log_data.prompt | 12 ++ core/prompts/bug-hunter/parse_task.prompt | 43 ++++ core/prompts/bug-hunter/system.prompt | 0 15 files changed, 501 insertions(+), 74 deletions(-) create mode 100644 core/agents/bug_hunter.py create mode 100644 core/config/magic_words.py create mode 100644 core/prompts/bug-hunter/bug_found_or_add_logs.prompt create mode 100644 core/prompts/bug-hunter/get_bug_reproduction_instructions.prompt create mode 100644 core/prompts/bug-hunter/iteration.prompt create mode 100644 core/prompts/bug-hunter/log_data.prompt create mode 100644 core/prompts/bug-hunter/parse_task.prompt create mode 100644 core/prompts/bug-hunter/system.prompt diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py new file mode 100644 index 000000000..12c79012a --- /dev/null +++ b/core/agents/bug_hunter.py @@ -0,0 +1,190 @@ +from enum import Enum +from typing import Annotated, Literal, Union + +from pydantic import BaseModel, Field + +from core.agents.base import BaseAgent +from core.agents.convo import AgentConvo +from core.agents.response import AgentResponse +from core.config import magic_words +from core.db.models.project_state import IterationStatus +from core.llm.parser import JSONParser +from core.log import get_logger + +log = get_logger(__name__) + + +class StepType(str, Enum): + ADD_LOG = "add_log" + EXPLAIN_PROBLEM = "explain_problem" + GET_ADDITIONAL_FILES = "get_additional_files" + + +class Log(BaseModel): + filePath: str + referenceCodeSnippet: str = Field(description="Five lines of code before the line where the log needs to be added. Make sure that this contains **ONLY** the code that is currently written in the file. It must not contain the log that you want to add.") + log: str + + +class AddLog(BaseModel): + type: Literal[StepType.ADD_LOG] = StepType.ADD_LOG + logsToAdd: list[Log] + + +class ExplainProblem(BaseModel): + type: Literal[StepType.EXPLAIN_PROBLEM] = StepType.EXPLAIN_PROBLEM + problem_explanation: str + + +class GetAdditionalFiles(BaseModel): + type: Literal[StepType.GET_ADDITIONAL_FILES] = StepType.GET_ADDITIONAL_FILES + filePath: str + + +# TODO enable LLM to ask for more files +class LoggingOptions(BaseModel): + decision: Annotated[ + Union[AddLog, ExplainProblem, GetAdditionalFiles], + Field(discriminator="type"), + ] + + +class HuntConclusionType(str, Enum): + ADD_LOGS = magic_words.ADD_LOGS + PROBLEM_IDENTIFIED = magic_words.PROBLEM_IDENTIFIED + + +class HuntConclusionOptions(BaseModel): + conclusion: HuntConclusionType = Field(description=f"If more logs are needed to identify the problem, respond with '{magic_words.ADD_LOGS}'. If the problem is identified, respond with '{magic_words.PROBLEM_IDENTIFIED}'.") + + +class BugHunter(BaseAgent): + agent_type = "bug-hunter" + display_name = "Bug Hunter" + + async def run(self) -> AgentResponse: + current_iteration = self.current_state.current_iteration + + if "bug_reproduction_description" not in current_iteration: + await self.get_bug_reproduction_instructions() + if current_iteration["status"] == IterationStatus.HUNTING_FOR_BUG: + # TODO determine how to find a bug (eg. check in db, ask user a question, etc.) + return await self.check_logs() + elif current_iteration["status"] == IterationStatus.AWAITING_USER_TEST: + return await self.ask_user_to_test() + elif current_iteration["status"] == IterationStatus.AWAITING_BUG_REPRODUCTION: + return await self.ask_user_to_test() + + async def get_bug_reproduction_instructions(self): + llm = self.get_llm() + convo = ( + AgentConvo(self) + .template( + "get_bug_reproduction_instructions", + current_task=self.current_state.current_task, + user_feedback=self.current_state.current_iteration["user_feedback"], + user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], + docs=self.current_state.docs, + next_solution_to_try=None, + ) + ) + bug_reproduction_instructions = await llm(convo, temperature=0) + self.next_state.current_iteration["bug_reproduction_description"] = bug_reproduction_instructions + + async def check_logs(self, logs_message: str = None): + llm = self.get_llm() + convo = ( + AgentConvo(self) + .template( + "iteration", + current_task=self.current_state.current_task, + user_feedback=self.current_state.current_iteration["user_feedback"], + user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], + docs=self.current_state.docs, + magic_words=magic_words, + next_solution_to_try=None + ) + ) + + for hunting_cycle in self.current_state.current_iteration["bug_hunting_cycles"]: + convo = (convo + .assistant(hunting_cycle["human_readable_instructions"]) + .template( + "log_data", + backend_logs=hunting_cycle["backend_logs"], + frontend_logs=hunting_cycle["frontend_logs"], + fix_attempted=hunting_cycle["fix_attempted"] + )) + + human_readable_instructions = await llm(convo, temperature=0.5) + + convo = ( + AgentConvo(self) + .template( + "bug_found_or_add_logs", + hunt_conclusion=human_readable_instructions, + ) + .require_schema(HuntConclusionOptions) + ) + hunt_conclusion = await llm(convo, parser=JSONParser(HuntConclusionOptions), temperature=0) + + self.next_state.current_iteration["description"] = human_readable_instructions + self.next_state.current_iteration["bug_hunting_cycles"] += [{ + "human_readable_instructions": human_readable_instructions, + "fix_attempted": False + }] + + if False and hunt_conclusion.conclusion == magic_words.PROBLEM_IDENTIFIED: + # if no need for logs, implement iteration same as before + self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_FIX + await self.send_message("The bug is found - I'm attempting to fix it.") + else: + # if logs are needed, add logging steps + self.next_state.current_iteration["status"] = IterationStatus.AWAITING_LOGGING + await self.send_message("Adding more logs to identify the bug.") + + self.next_state.flag_iterations_as_modified() + return AgentResponse.done(self) + + async def ask_user_to_test(self): + + reproduce_bug_and_get_logs = self.current_state.current_iteration["status"] == IterationStatus.AWAITING_BUG_REPRODUCTION + + await self.send_message("You can reproduce the bug like this:\n\n" + self.current_state.current_iteration["bug_reproduction_description"]) + if self.current_state.current_iteration["status"] == IterationStatus.AWAITING_USER_TEST: + user_feedback = await self.ask_question( + "Is the bug you reported fixed now?", + buttons={"yes": "Yes, the issue is fixed", "no": "No"}, + default="continue", + buttons_only=True, + hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"] + ) + self.next_state.current_iteration["bug_hunting_cycles"][-1]["fix_attempted"] = True + + if user_feedback.button == "yes": + self.next_state.complete_iteration() + else: + reproduce_bug_and_get_logs = True + + if reproduce_bug_and_get_logs: + # TODO how can we get FE and BE logs automatically? + backend_logs = await self.ask_question( + "Please do exactly what you did in the last iteration, paste **BACKEND** logs here and click CONTINUE.", + buttons={"continue": "Continue"}, + default="continue", + hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"] + ) + + frontend_logs = await self.ask_question( + "Please paste **frontend** logs here and click CONTINUE.", + buttons={"continue": "Continue"}, + default="continue", + hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"] + ) + + # TODO select only the logs that are new (with PYTHAGORA_DEBUGGING_LOG) + self.next_state.current_iteration["bug_hunting_cycles"][-1]["backend_logs"] = backend_logs.text + self.next_state.current_iteration["bug_hunting_cycles"][-1]["frontend_logs"] = frontend_logs.text + self.next_state.current_iteration["status"] = IterationStatus.HUNTING_FOR_BUG + + return AgentResponse.done(self) diff --git a/core/agents/developer.py b/core/agents/developer.py index 403530cce..d5e4a99cc 100644 --- a/core/agents/developer.py +++ b/core/agents/developer.py @@ -1,13 +1,13 @@ -from enum import Enum -from typing import Annotated, Literal, Optional, Union +from typing import Optional from uuid import uuid4 from pydantic import BaseModel, Field from core.agents.base import BaseAgent from core.agents.convo import AgentConvo +from core.agents.mixins import TaskSteps from core.agents.response import AgentResponse, ResponseType -from core.db.models.project_state import TaskStatus +from core.db.models.project_state import IterationStatus, TaskStatus from core.db.models.specification import Complexity from core.llm.parser import JSONParser from core.log import get_logger @@ -16,47 +16,6 @@ log = get_logger(__name__) -class StepType(str, Enum): - COMMAND = "command" - SAVE_FILE = "save_file" - HUMAN_INTERVENTION = "human_intervention" - - -class CommandOptions(BaseModel): - command: str = Field(description="Command to run") - timeout: int = Field(description="Timeout in seconds") - success_message: str = "" - - -class SaveFileOptions(BaseModel): - path: str - - -class SaveFileStep(BaseModel): - type: Literal[StepType.SAVE_FILE] = StepType.SAVE_FILE - save_file: SaveFileOptions - - -class CommandStep(BaseModel): - type: Literal[StepType.COMMAND] = StepType.COMMAND - command: CommandOptions - - -class HumanInterventionStep(BaseModel): - type: Literal[StepType.HUMAN_INTERVENTION] = StepType.HUMAN_INTERVENTION - human_intervention_description: str - - -Step = Annotated[ - Union[SaveFileStep, CommandStep, HumanInterventionStep], - Field(discriminator="type"), -] - - -class TaskSteps(BaseModel): - steps: list[Step] - - class RelevantFiles(BaseModel): relevant_files: list[str] = Field(description="List of relevant files for the current task.") @@ -109,6 +68,17 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] n_tasks = 1 log.debug(f"Breaking down the task review feedback {task_review_feedback}") await self.send_message("Breaking down the task review feedback...") + elif (self.current_state.current_iteration["status"] == IterationStatus.AWAITING_BUG_FIX or + self.current_state.current_iteration["status"] == IterationStatus.AWAITING_LOGGING): + iteration = self.current_state.current_iteration + current_task["task_review_feedback"] = None + + description = iteration["bug_hunting_cycles"][-1]["human_readable_instructions"] + user_feedback = iteration["user_feedback"] + source = "bug_hunt" + n_tasks = len(self.next_state.iterations) + log.debug(f"Breaking down the logging cycle {description}") + await self.send_message("Breaking down the current iteration logging cycle ...") else: iteration = self.current_state.current_iteration current_task["task_review_feedback"] = None @@ -156,8 +126,14 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] self.set_next_steps(response, source) if iteration: - self.next_state.complete_iteration() - self.next_state.action = f"Troubleshooting #{len(self.current_state.iterations)}" + # fixme please :cry: + if ("status" in iteration) and (iteration["status"] == IterationStatus.AWAITING_BUG_FIX or + iteration["status"] == IterationStatus.AWAITING_LOGGING): + self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_REPRODUCTION if ( + iteration["status"] == IterationStatus.AWAITING_LOGGING) else IterationStatus.AWAITING_USER_TEST + else: + self.next_state.complete_iteration() + self.next_state.action = f"Troubleshooting #{len(self.current_state.iterations)}" else: self.next_state.action = "Task review feedback" @@ -265,7 +241,9 @@ def set_next_steps(self, response: TaskSteps, source: str): } for step in response.steps ] - if len(self.next_state.unfinished_steps) > 0 and source != "review": + if (len(self.next_state.unfinished_steps) > 0 and + source != "review" and + self.next_state.current_iteration["status"] != IterationStatus.AWAITING_LOGGING): self.next_state.steps += [ # TODO: add refactor step here once we have the refactor agent { diff --git a/core/agents/error_handler.py b/core/agents/error_handler.py index c24e9c8aa..fd48b250d 100644 --- a/core/agents/error_handler.py +++ b/core/agents/error_handler.py @@ -3,6 +3,7 @@ from core.agents.base import BaseAgent from core.agents.convo import AgentConvo from core.agents.response import AgentResponse +from core.db.models.project_state import IterationStatus from core.log import get_logger log = get_logger(__name__) @@ -110,7 +111,7 @@ async def handle_command_error(self, message: str, details: dict) -> AgentRespon "description": llm_response, "alternative_solutions": [], "attempts": 1, - "completed": False, + "status": IterationStatus.HUNTING_FOR_BUG, } ] # TODO: maybe have ProjectState.finished_steps as well? would make the debug/ran_command prompts nicer too diff --git a/core/agents/mixins.py b/core/agents/mixins.py index 5ea0aae78..4533afdfe 100644 --- a/core/agents/mixins.py +++ b/core/agents/mixins.py @@ -1,8 +1,52 @@ -from typing import Optional +from enum import Enum +from typing import Annotated, Literal, Optional, Union + +from pydantic import BaseModel, Field from core.agents.convo import AgentConvo +class StepType(str, Enum): + COMMAND = "command" + SAVE_FILE = "save_file" + HUMAN_INTERVENTION = "human_intervention" + + +class CommandOptions(BaseModel): + command: str = Field(description="Command to run") + timeout: int = Field(description="Timeout in seconds") + success_message: str = "" + + +class SaveFileOptions(BaseModel): + path: str + + +class SaveFileStep(BaseModel): + type: Literal[StepType.SAVE_FILE] = StepType.SAVE_FILE + save_file: SaveFileOptions + + +class CommandStep(BaseModel): + type: Literal[StepType.COMMAND] = StepType.COMMAND + command: CommandOptions + + +class HumanInterventionStep(BaseModel): + type: Literal[StepType.HUMAN_INTERVENTION] = StepType.HUMAN_INTERVENTION + human_intervention_description: str + + +Step = Annotated[ + Union[SaveFileStep, CommandStep, HumanInterventionStep], + Field(discriminator="type"), +] + + +class TaskSteps(BaseModel): + steps: list[Step] + + class IterationPromptMixin: """ Provides a method to find a solution to a problem based on user feedback. @@ -16,6 +60,7 @@ async def find_solution( *, user_feedback_qa: Optional[list[str]] = None, next_solution_to_try: Optional[str] = None, + bug_hunting_cycles: Optional[dict] = None, ) -> str: """ Generate a new solution for the problem the user reported. @@ -23,6 +68,7 @@ async def find_solution( :param user_feedback: User feedback about the problem. :param user_feedback_qa: Additional q/a about the problem provided by the user (optional). :param next_solution_to_try: Hint from ProblemSolver on which solution to try (optional). + :param bug_hunting_cycles: Data about logs that need to be added to the code (optional). :return: The generated solution to the problem. """ llm = self.get_llm() @@ -32,6 +78,7 @@ async def find_solution( user_feedback=user_feedback, user_feedback_qa=user_feedback_qa, next_solution_to_try=next_solution_to_try, + bug_hunting_cycles=bug_hunting_cycles, ) llm_solution: str = await llm(convo) return llm_solution diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py index 14cc1521f..72317d0a8 100644 --- a/core/agents/orchestrator.py +++ b/core/agents/orchestrator.py @@ -2,6 +2,7 @@ from core.agents.architect import Architect from core.agents.base import BaseAgent +from core.agents.bug_hunter import BugHunter from core.agents.code_monkey import CodeMonkey from core.agents.code_reviewer import CodeReviewer from core.agents.developer import Developer @@ -18,7 +19,7 @@ from core.agents.tech_lead import TechLead from core.agents.tech_writer import TechnicalWriter from core.agents.troubleshooter import Troubleshooter -from core.db.models.project_state import TaskStatus +from core.db.models.project_state import IterationStatus, TaskStatus from core.log import get_logger from core.telemetry import telemetry from core.ui.base import ProjectStage @@ -226,12 +227,25 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: return self.create_agent_for_step(state.current_step) if state.unfinished_iterations: - if state.current_iteration["description"]: - # Break down the next iteration into steps + if state.current_iteration["status"] == IterationStatus.HUNTING_FOR_BUG: + # Ask the Logger to check if more logs in the code are needed + return BugHunter(self.state_manager, self.ui) + elif (state.current_iteration["status"] == IterationStatus.AWAITING_LOGGING or + state.current_iteration["status"] == IterationStatus.AWAITING_BUG_FIX): + # Ask the Logger to ask user to test new logs return Developer(self.state_manager, self.ui) - else: - # We need to iterate over the current task but there's no solution, as Pythagora - # is stuck in a loop, and ProblemSolver needs to find alternative solutions. + elif (state.current_iteration["status"] == IterationStatus.AWAITING_USER_TEST or + state.current_iteration["status"] == IterationStatus.AWAITING_BUG_REPRODUCTION): + # Ask the Logger to ask user to test new logs + return BugHunter(self.state_manager, self.ui) + elif state.current_iteration["status"] == IterationStatus.FIND_SOLUTION: + # Find solution to the iteration problem + return Troubleshooter(self.state_manager, self.ui) + # elif state.current_iteration["status"] == IterationStatus.AWAITING_BUG_FIX: + # # Break down the next iteration into steps + # return Developer(self.state_manager, self.ui) + elif state.current_iteration["status"] == IterationStatus.PROBLEM_SOLVER: + # Call Problem Solver if the user said "I'm stuck in a loop" return ProblemSolver(self.state_manager, self.ui) # We have just finished the task, call Troubleshooter to ask the user to review diff --git a/core/agents/problem_solver.py b/core/agents/problem_solver.py index 680a4215e..f4b00cbf3 100644 --- a/core/agents/problem_solver.py +++ b/core/agents/problem_solver.py @@ -6,6 +6,7 @@ from core.agents.convo import AgentConvo from core.agents.response import AgentResponse from core.agents.troubleshooter import IterationPromptMixin +from core.db.models.project_state import IterationStatus from core.llm.parser import JSONParser from core.log import get_logger @@ -98,6 +99,7 @@ async def try_alternative_solutions(self) -> AgentResponse: self.next_state_iteration["alternative_solutions"][index]["tried"] = True self.next_state_iteration["description"] = llm_solution self.next_state_iteration["attempts"] = self.iteration["attempts"] + 1 + self.next_state_iteration["status"] = IterationStatus.AWAITING_BUG_FIX self.next_state.flag_iterations_as_modified() return AgentResponse.done(self) diff --git a/core/agents/troubleshooter.py b/core/agents/troubleshooter.py index 4f097ed03..e9ac85172 100644 --- a/core/agents/troubleshooter.py +++ b/core/agents/troubleshooter.py @@ -9,7 +9,7 @@ from core.agents.response import AgentResponse from core.config import ROUTE_FILES_AGENT_NAME from core.db.models.file import File -from core.db.models.project_state import TaskStatus +from core.db.models.project_state import IterationStatus, TaskStatus from core.llm.parser import JSONParser, OptionalCodeBlockParser from core.log import get_logger from core.telemetry import telemetry @@ -33,7 +33,29 @@ class Troubleshooter(IterationPromptMixin, BaseAgent): agent_type = "troubleshooter" display_name = "Troubleshooter" - async def run(self) -> AgentResponse: + async def run(self): + if self.current_state.unfinished_iterations: + if self.current_state.current_iteration.get("status") == IterationStatus.FIND_SOLUTION: + return await self.propose_solution() + else: + raise ValueError("There is unfinished iteration but it's not in FIND_SOLUTION state.") + else: + return await self.create_iteration() + + async def propose_solution(self) -> AgentResponse: + user_feedback = self.current_state.current_iteration.get("user_feedback") + user_feedback_qa = self.current_state.current_iteration.get("user_feedback_qa") + bug_hunting_cycles = self.current_state.current_iteration.get("bug_hunting_cycles") + + llm_solution = await self.find_solution(user_feedback, user_feedback_qa=user_feedback_qa, bug_hunting_cycles=bug_hunting_cycles) + + self.next_state.current_iteration["description"] = llm_solution + self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_FIX + self.next_state.flag_iterations_as_modified() + + return AgentResponse.done(self) + + async def create_iteration(self) -> AgentResponse: run_command = await self.get_run_command() user_instructions = self.current_state.current_task.get("test_instructions") @@ -54,7 +76,7 @@ async def run(self) -> AgentResponse: # use "current_iteration" here last_iteration = self.current_state.iterations[-1] if len(self.current_state.iterations) >= 3 else None - should_iterate, is_loop, user_feedback = await self.get_user_feedback( + should_iterate, is_loop, bug_report, change_description = await self.get_user_feedback( run_command, user_instructions, last_iteration is not None, @@ -63,6 +85,7 @@ async def run(self) -> AgentResponse: # User tested and reported no problems, we're done with the task return await self.complete_task() + user_feedback = bug_report or change_description user_feedback_qa = await self.generate_bug_report(run_command, user_instructions, user_feedback) if is_loop: @@ -71,24 +94,30 @@ async def run(self) -> AgentResponse: return self.try_next_alternative_solution(user_feedback, user_feedback_qa) else: # Newly detected loop, set up an empty new iteration to trigger ProblemSolver - llm_solution = "" + iteration_status = IterationStatus.AWAITING_BUG_FIX await self.trace_loop("loop-feedback") - else: - llm_solution = await self.find_solution(user_feedback, user_feedback_qa=user_feedback_qa) + elif bug_report is not None: + iteration_status = IterationStatus.HUNTING_FOR_BUG + elif change_description is not None: + iteration_status = IterationStatus.FIND_SOLUTION + self.next_state.iterations = self.current_state.iterations + [ { "id": uuid4().hex, "user_feedback": user_feedback, "user_feedback_qa": user_feedback_qa, - "description": llm_solution, + "description": change_description, "alternative_solutions": [], # FIXME - this is incorrect if this is a new problem; otherwise we could # just count the iterations "attempts": 1, - "completed": False, + "status": iteration_status, + "bug_hunting_cycles": [], } ] + + self.next_state.flag_iterations_as_modified() if len(self.next_state.iterations) == LOOP_THRESHOLD: await self.trace_loop("loop-start") @@ -187,6 +216,11 @@ async def get_user_feedback( feedback (eg. if they just clicked on "Continue" or "I'm stuck in a loop"). """ + bug_report = None + change_description = None + is_loop = False + should_iterate = True + test_message = "Can you check if the app works please?" if user_instructions: hint = " Here is a description of what should be working:\n\n" + user_instructions @@ -194,15 +228,19 @@ async def get_user_feedback( if run_command: await self.ui.send_run_command(run_command) - buttons = {"continue": "continue"} + buttons = { + "continue": "Everything works", + "change": "I want to make a change", + "bug": "There is an issue" + } if last_iteration: buttons["loop"] = "I'm stuck in a loop" - user_response = await self.ask_question(test_message, buttons=buttons, default="continue", hint=hint) + user_response = await self.ask_question(test_message, buttons=buttons, default="continue", buttons_only=True, hint=hint) if user_response.button == "continue" or user_response.cancelled: - return False, False, "" + should_iterate = False - if user_response.button == "loop": + elif user_response.button == "loop": await telemetry.trace_code_event( "stuck-in-loop", { @@ -218,16 +256,23 @@ async def get_user_feedback( }, }, ) - return True, True, "" + is_loop = True + + elif user_response.button == "change": + user_description = await self.ask_question("Please describe the change you want to make (one at the time please)") + change_description = user_description.text + + elif user_response.button == "bug": + user_description = await self.ask_question("Please describe the issue you found (one at the time please)") + bug_report = user_description.text - return True, False, user_response.text + return should_iterate, is_loop, bug_report, change_description def try_next_alternative_solution(self, user_feedback: str, user_feedback_qa: list[str]) -> AgentResponse: """ Call the ProblemSolver to try an alternative solution. - Stores the user feedback and sets iteration state (not completed, no description) - so that ProblemSolver will be triggered. + Stores the user feedback and sets iteration state so that ProblemSolver will be triggered. :param user_feedback: User feedback to store in the iteration state. :param user_feedback_qa: Additional questions/answers about the problem. @@ -238,7 +283,7 @@ def try_next_alternative_solution(self, user_feedback: str, user_feedback_qa: li next_state_iteration["user_feedback"] = user_feedback next_state_iteration["user_feedback_qa"] = user_feedback_qa next_state_iteration["attempts"] += 1 - next_state_iteration["completed"] = False + next_state_iteration["status"] = IterationStatus.PROBLEM_SOLVER self.next_state.flag_iterations_as_modified() self.next_state.action = f"Alternative solution (attempt #{next_state_iteration['attempts']})" return AgentResponse.done(self) diff --git a/core/config/magic_words.py b/core/config/magic_words.py new file mode 100644 index 000000000..8b91db82b --- /dev/null +++ b/core/config/magic_words.py @@ -0,0 +1,2 @@ +PROBLEM_IDENTIFIED = "PROBLEM_IDENTIFIED" +ADD_LOGS = "ADD_LOGS" diff --git a/core/db/models/project_state.py b/core/db/models/project_state.py index e415a9316..084150c9a 100644 --- a/core/db/models/project_state.py +++ b/core/db/models/project_state.py @@ -30,6 +30,19 @@ class TaskStatus: SKIPPED = "skipped" +class IterationStatus: + """Status of an iteration.""" + + HUNTING_FOR_BUG = "check_logs" + AWAITING_LOGGING = "awaiting_logging" + AWAITING_USER_TEST = "awaiting_user_test" + AWAITING_BUG_FIX = "awaiting_bug_fix" + AWAITING_BUG_REPRODUCTION = "awaiting_bug_reproduction" + FIND_SOLUTION = "find_solution" + PROBLEM_SOLVER = "problem_solver" + DONE = "done" + + class ProjectState(Base): __tablename__ = "project_states" __table_args__ = ( @@ -105,7 +118,7 @@ def unfinished_iterations(self) -> list[dict]: :return: List of unfinished iterations. """ - return [iteration for iteration in self.iterations if not iteration.get("completed")] + return [iteration for iteration in self.iterations if iteration.get("status") != IterationStatus.DONE] @property def current_iteration(self) -> Optional[dict]: @@ -285,7 +298,7 @@ def complete_iteration(self): raise ValueError("Current state is read-only (already has a next state).") log.debug(f"Completing iteration {self.unfinished_iterations[0]}") - self.unfinished_iterations[0]["completed"] = True + self.unfinished_iterations[0]["status"] = IterationStatus.DONE self.flag_iterations_as_modified() def flag_iterations_as_modified(self): diff --git a/core/prompts/bug-hunter/bug_found_or_add_logs.prompt b/core/prompts/bug-hunter/bug_found_or_add_logs.prompt new file mode 100644 index 000000000..6fd387041 --- /dev/null +++ b/core/prompts/bug-hunter/bug_found_or_add_logs.prompt @@ -0,0 +1,6 @@ +We are working on a solving a technical problem in a codebase and here is a conclusion from a team member: +--- TEAM_MEMBER_CONCLUSION --- +{{ hunt_conclusion }} +--- END_OF_TEAM_MEMBER_CONCLUSION --- + +Please tell me if the conclusion from the team member is to add more logs around the code or if the conclusion is that there are all information needed to fix the issue. diff --git a/core/prompts/bug-hunter/get_bug_reproduction_instructions.prompt b/core/prompts/bug-hunter/get_bug_reproduction_instructions.prompt new file mode 100644 index 000000000..14cc928de --- /dev/null +++ b/core/prompts/bug-hunter/get_bug_reproduction_instructions.prompt @@ -0,0 +1,31 @@ +You are working on an app called "{{ state.branch.project.name }}" and you need to write code for the entire application. + +{% include "partials/project_details.prompt" %} + +{% if state.tasks and state.current_task %} +Development process of this app was split into smaller tasks. Here is the list of all tasks: +```{% for task in state.tasks %} +{{ loop.index }}. {{ task.description }} +{% endfor %} +``` + +You are currently working on, and have to focus only on, this task: +``` +{{ current_task.description }} +``` + +{% endif %} +A part of the app is already finished. +{% include "partials/files_list.prompt" %} + +{% include "partials/user_feedback.prompt" %} + +{% if next_solution_to_try is not none %} +Focus on solving this issue in the following way: +``` +{{ next_solution_to_try }} +``` +{% endif %} +{#{% include "partials/doc_snippets.prompt" %}#} + +Based on this information, you need to tell me in 2-3 sentences how can I reproduce the issue that the user experienced. diff --git a/core/prompts/bug-hunter/iteration.prompt b/core/prompts/bug-hunter/iteration.prompt new file mode 100644 index 000000000..89ffdd84e --- /dev/null +++ b/core/prompts/bug-hunter/iteration.prompt @@ -0,0 +1,43 @@ +You are working on an app called "{{ state.branch.project.name }}" and you need to write code for the entire application. + +{% include "partials/project_details.prompt" %} + +{% if state.tasks and state.current_task %} +Development process of this app was split into smaller tasks. Here is the list of all tasks: +```{% for task in state.tasks %} +{{ loop.index }}. {{ task.description }} +{% endfor %} +``` + +You are currently working on, and have to focus only on, this task: +``` +{{ current_task.description }} +``` + +{% endif %} +A part of the app is already finished. +{% include "partials/files_list.prompt" %} + +{% include "partials/user_feedback.prompt" %} + +{% if next_solution_to_try is not none %} +Focus on solving this issue in the following way: +``` +{{ next_solution_to_try }} +``` +{% endif %} +{#{% include "partials/doc_snippets.prompt" %}#} + +Based on this information, you need to figure out where is the problem that the user described. You have 2 options - to tell me exactly where is the problem happening or to add more logs to better determine where is the problem. If you think we should add more logs around the code to better understand the problem, tell me code snippets in which we should add the logs. If you think you know where the issue is, don't add any new logs but explain what log print tell point you to the problem, what the problem is, what is the solution to this problem and how the solution will fix the problem. What is your answer? + +{#**IMPORTANT** +I you decide to add logs, start the message with "{{ magic_words.ADD_LOGS }}". Make sure that the log starts with "PYTHAGORA_DEBUGGING_LOG". For example, if you want to add a log in a Python code `print("Hello, World!")`, you should put `print("PYTHAGORA_DEBUGGING_LOG: Hello, World!")` instead. This refers to **ONLY** the logs you want to add that are not in the codebase at the moment. All logs that are in the codebase already, you must not change. When you write code, you **MUST NOT** add any new lines of code except the logs. + +Your message will be read by a developer so you don't have to write the entire files in which you want to add logs to. Just write the lines of code where you want to add logs so that the developer reading your message understands where should they put the logs to. + +**IMPORTANT** +If you have all the information you need to solve the problem, do not add any logs but explain what is the problem, where in the code is it and how should it be fixed and end the response with with "{{ magic_words.PROBLEM_IDENTIFIED }}". Remember, if you mentioned to add any logs, you **MUST NOT** say "{{ magic_words.PROBLEM_IDENTIFIED }}". + +{% include "partials/file_naming.prompt" %} +{% include "partials/relative_paths.prompt" %} +#} diff --git a/core/prompts/bug-hunter/log_data.prompt b/core/prompts/bug-hunter/log_data.prompt new file mode 100644 index 000000000..a695d9668 --- /dev/null +++ b/core/prompts/bug-hunter/log_data.prompt @@ -0,0 +1,12 @@ +{% if backend_logs is not none %}Here are the logs we added to the backend: +``` +{{ backend_logs }} +``` +{% endif %}{% if frontend_logs is not none %} +Here are the logs we added to the frontend: +``` +{{ frontend_logs }} +``` +{% endif %}{% if fix_attempted %} +The problem wasn't solved with the last changes. You have 2 options - to tell me exactly where is the problem happening or to add more logs to better determine where is the problem. If you think we should add more logs around the code to better understand the problem, tell me code snippets in which we should add the logs. If you think you know where the issue is, don't add any new logs but explain what log print tell point you to the problem, what the problem is, what is the solution to this problem and how the solution will fix the problem. What is your answer? +{% endif %} diff --git a/core/prompts/bug-hunter/parse_task.prompt b/core/prompts/bug-hunter/parse_task.prompt new file mode 100644 index 000000000..68511ee02 --- /dev/null +++ b/core/prompts/bug-hunter/parse_task.prompt @@ -0,0 +1,43 @@ +Ok, now, take your response and convert it to a list of actionable steps that will be executed by a machine. +Analyze the entire message, think step by step and make sure that you don't omit any information +when converting this message to steps. + +Each step can be either: + +* `command` - command to run (must be able to run on a {{ os }} machine, assume current working directory is project root folder) +* `save_file` - create or update ONE file (only provide file path, not contents) +* `human_intervention` - if you need the human to do something, use this type of step and explain in details what you want the human to do. NEVER use `human_intervention` for testing, as testing will be done separately by a dedicated QA after all the steps are done. Also you MUST NOT use `human_intervention` to ask the human to write or review code. + +**IMPORTANT**: If multiple changes are required for same file, you must provide single `save_file` step for each file. + +{% include "partials/file_naming.prompt" %} +{% include "partials/relative_paths.prompt" %} +{% include "partials/execution_order.prompt" %} +{% include "partials/human_intervention_explanation.prompt" %} + +**IMPORTANT**: Remember, NEVER output human intervention steps to do manual tests or coding tasks, even if the previous message asks for it! The testing will be done *after* these steps and you MUST NOT include testing in these steps. + +Examples: +------------------------example_1--------------------------- +``` +{ + "tasks": [ + { + "type": "save_file", + "save_file": { + "path": "server.js" + }, + }, + { + "type": "command", + "command": { + "command": "mv index.js public/index.js"", + "timeout": 5, + "success_message": "", + "command_id": "move_index_file" + } + } + ] +} +``` +------------------------end_of_example_1--------------------------- diff --git a/core/prompts/bug-hunter/system.prompt b/core/prompts/bug-hunter/system.prompt new file mode 100644 index 000000000..e69de29bb From bb720de374a3f39197969827e8f5064f72603484 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Tue, 23 Jul 2024 21:22:34 -0700 Subject: [PATCH 02/16] Fixes --- core/agents/developer.py | 4 ++-- core/agents/orchestrator.py | 6 ++---- core/agents/problem_solver.py | 2 +- core/agents/troubleshooter.py | 6 +++--- core/db/models/project_state.py | 1 + 5 files changed, 9 insertions(+), 10 deletions(-) diff --git a/core/agents/developer.py b/core/agents/developer.py index d5e4a99cc..bbcffc1fa 100644 --- a/core/agents/developer.py +++ b/core/agents/developer.py @@ -242,8 +242,8 @@ def set_next_steps(self, response: TaskSteps, source: str): for step in response.steps ] if (len(self.next_state.unfinished_steps) > 0 and - source != "review" and - self.next_state.current_iteration["status"] != IterationStatus.AWAITING_LOGGING): + source != "review" and (self.next_state.current_iteration is None or + self.next_state.current_iteration["status"] != IterationStatus.AWAITING_LOGGING)): self.next_state.steps += [ # TODO: add refactor step here once we have the refactor agent { diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py index 72317d0a8..888ec0716 100644 --- a/core/agents/orchestrator.py +++ b/core/agents/orchestrator.py @@ -231,7 +231,8 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: # Ask the Logger to check if more logs in the code are needed return BugHunter(self.state_manager, self.ui) elif (state.current_iteration["status"] == IterationStatus.AWAITING_LOGGING or - state.current_iteration["status"] == IterationStatus.AWAITING_BUG_FIX): + state.current_iteration["status"] == IterationStatus.AWAITING_BUG_FIX or + state.current_iteration["status"] == IterationStatus.IMPLEMENT_SOLUTION): # Ask the Logger to ask user to test new logs return Developer(self.state_manager, self.ui) elif (state.current_iteration["status"] == IterationStatus.AWAITING_USER_TEST or @@ -241,9 +242,6 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: elif state.current_iteration["status"] == IterationStatus.FIND_SOLUTION: # Find solution to the iteration problem return Troubleshooter(self.state_manager, self.ui) - # elif state.current_iteration["status"] == IterationStatus.AWAITING_BUG_FIX: - # # Break down the next iteration into steps - # return Developer(self.state_manager, self.ui) elif state.current_iteration["status"] == IterationStatus.PROBLEM_SOLVER: # Call Problem Solver if the user said "I'm stuck in a loop" return ProblemSolver(self.state_manager, self.ui) diff --git a/core/agents/problem_solver.py b/core/agents/problem_solver.py index f4b00cbf3..08de13a89 100644 --- a/core/agents/problem_solver.py +++ b/core/agents/problem_solver.py @@ -99,7 +99,7 @@ async def try_alternative_solutions(self) -> AgentResponse: self.next_state_iteration["alternative_solutions"][index]["tried"] = True self.next_state_iteration["description"] = llm_solution self.next_state_iteration["attempts"] = self.iteration["attempts"] + 1 - self.next_state_iteration["status"] = IterationStatus.AWAITING_BUG_FIX + self.next_state_iteration["status"] = IterationStatus.PROBLEM_SOLVER self.next_state.flag_iterations_as_modified() return AgentResponse.done(self) diff --git a/core/agents/troubleshooter.py b/core/agents/troubleshooter.py index e9ac85172..7410b45cf 100644 --- a/core/agents/troubleshooter.py +++ b/core/agents/troubleshooter.py @@ -50,7 +50,7 @@ async def propose_solution(self) -> AgentResponse: llm_solution = await self.find_solution(user_feedback, user_feedback_qa=user_feedback_qa, bug_hunting_cycles=bug_hunting_cycles) self.next_state.current_iteration["description"] = llm_solution - self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_FIX + self.next_state.current_iteration["status"] = IterationStatus.IMPLEMENT_SOLUTION self.next_state.flag_iterations_as_modified() return AgentResponse.done(self) @@ -93,8 +93,8 @@ async def create_iteration(self) -> AgentResponse: # If we already have alternative solutions, it means we were already in a loop. return self.try_next_alternative_solution(user_feedback, user_feedback_qa) else: - # Newly detected loop, set up an empty new iteration to trigger ProblemSolver - iteration_status = IterationStatus.AWAITING_BUG_FIX + # Newly detected loop + iteration_status = IterationStatus.PROBLEM_SOLVER await self.trace_loop("loop-feedback") elif bug_report is not None: iteration_status = IterationStatus.HUNTING_FOR_BUG diff --git a/core/db/models/project_state.py b/core/db/models/project_state.py index 084150c9a..cb3c86a76 100644 --- a/core/db/models/project_state.py +++ b/core/db/models/project_state.py @@ -38,6 +38,7 @@ class IterationStatus: AWAITING_USER_TEST = "awaiting_user_test" AWAITING_BUG_FIX = "awaiting_bug_fix" AWAITING_BUG_REPRODUCTION = "awaiting_bug_reproduction" + IMPLEMENT_SOLUTION = "implement_solution" FIND_SOLUTION = "find_solution" PROBLEM_SOLVER = "problem_solver" DONE = "done" From a6708d42443f0f1ff81a96f051cbafa34209d21b Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 24 Jul 2024 08:49:39 -0700 Subject: [PATCH 03/16] Removed dead code --- core/agents/bug_hunter.py | 29 ------------- .../get_bug_reproduction_instructions.prompt | 1 - core/prompts/bug-hunter/parse_task.prompt | 43 ------------------- 3 files changed, 73 deletions(-) delete mode 100644 core/prompts/bug-hunter/parse_task.prompt diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 12c79012a..097ea870e 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -20,35 +20,6 @@ class StepType(str, Enum): GET_ADDITIONAL_FILES = "get_additional_files" -class Log(BaseModel): - filePath: str - referenceCodeSnippet: str = Field(description="Five lines of code before the line where the log needs to be added. Make sure that this contains **ONLY** the code that is currently written in the file. It must not contain the log that you want to add.") - log: str - - -class AddLog(BaseModel): - type: Literal[StepType.ADD_LOG] = StepType.ADD_LOG - logsToAdd: list[Log] - - -class ExplainProblem(BaseModel): - type: Literal[StepType.EXPLAIN_PROBLEM] = StepType.EXPLAIN_PROBLEM - problem_explanation: str - - -class GetAdditionalFiles(BaseModel): - type: Literal[StepType.GET_ADDITIONAL_FILES] = StepType.GET_ADDITIONAL_FILES - filePath: str - - -# TODO enable LLM to ask for more files -class LoggingOptions(BaseModel): - decision: Annotated[ - Union[AddLog, ExplainProblem, GetAdditionalFiles], - Field(discriminator="type"), - ] - - class HuntConclusionType(str, Enum): ADD_LOGS = magic_words.ADD_LOGS PROBLEM_IDENTIFIED = magic_words.PROBLEM_IDENTIFIED diff --git a/core/prompts/bug-hunter/get_bug_reproduction_instructions.prompt b/core/prompts/bug-hunter/get_bug_reproduction_instructions.prompt index 14cc928de..fcef8cf93 100644 --- a/core/prompts/bug-hunter/get_bug_reproduction_instructions.prompt +++ b/core/prompts/bug-hunter/get_bug_reproduction_instructions.prompt @@ -26,6 +26,5 @@ Focus on solving this issue in the following way: {{ next_solution_to_try }} ``` {% endif %} -{#{% include "partials/doc_snippets.prompt" %}#} Based on this information, you need to tell me in 2-3 sentences how can I reproduce the issue that the user experienced. diff --git a/core/prompts/bug-hunter/parse_task.prompt b/core/prompts/bug-hunter/parse_task.prompt deleted file mode 100644 index 68511ee02..000000000 --- a/core/prompts/bug-hunter/parse_task.prompt +++ /dev/null @@ -1,43 +0,0 @@ -Ok, now, take your response and convert it to a list of actionable steps that will be executed by a machine. -Analyze the entire message, think step by step and make sure that you don't omit any information -when converting this message to steps. - -Each step can be either: - -* `command` - command to run (must be able to run on a {{ os }} machine, assume current working directory is project root folder) -* `save_file` - create or update ONE file (only provide file path, not contents) -* `human_intervention` - if you need the human to do something, use this type of step and explain in details what you want the human to do. NEVER use `human_intervention` for testing, as testing will be done separately by a dedicated QA after all the steps are done. Also you MUST NOT use `human_intervention` to ask the human to write or review code. - -**IMPORTANT**: If multiple changes are required for same file, you must provide single `save_file` step for each file. - -{% include "partials/file_naming.prompt" %} -{% include "partials/relative_paths.prompt" %} -{% include "partials/execution_order.prompt" %} -{% include "partials/human_intervention_explanation.prompt" %} - -**IMPORTANT**: Remember, NEVER output human intervention steps to do manual tests or coding tasks, even if the previous message asks for it! The testing will be done *after* these steps and you MUST NOT include testing in these steps. - -Examples: -------------------------example_1--------------------------- -``` -{ - "tasks": [ - { - "type": "save_file", - "save_file": { - "path": "server.js" - }, - }, - { - "type": "command", - "command": { - "command": "mv index.js public/index.js"", - "timeout": 5, - "success_message": "", - "command_id": "move_index_file" - } - } - ] -} -``` -------------------------end_of_example_1--------------------------- From 8393eb46cd88cfb23c70409fd66f94c40292e681 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 24 Jul 2024 08:50:46 -0700 Subject: [PATCH 04/16] Refactoring and adding comments --- core/agents/bug_hunter.py | 10 +++++----- core/agents/developer.py | 17 +++++++++-------- core/agents/orchestrator.py | 29 ++++++++++++++++++----------- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 097ea870e..430ba2080 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -42,9 +42,9 @@ async def run(self) -> AgentResponse: # TODO determine how to find a bug (eg. check in db, ask user a question, etc.) return await self.check_logs() elif current_iteration["status"] == IterationStatus.AWAITING_USER_TEST: - return await self.ask_user_to_test() + return await self.ask_user_to_test(False, True) elif current_iteration["status"] == IterationStatus.AWAITING_BUG_REPRODUCTION: - return await self.ask_user_to_test() + return await self.ask_user_to_test(True, False) async def get_bug_reproduction_instructions(self): llm = self.get_llm() @@ -117,12 +117,12 @@ async def check_logs(self, logs_message: str = None): self.next_state.flag_iterations_as_modified() return AgentResponse.done(self) - async def ask_user_to_test(self): + async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiting_user_test: bool = False): - reproduce_bug_and_get_logs = self.current_state.current_iteration["status"] == IterationStatus.AWAITING_BUG_REPRODUCTION + reproduce_bug_and_get_logs = awaiting_bug_reproduction await self.send_message("You can reproduce the bug like this:\n\n" + self.current_state.current_iteration["bug_reproduction_description"]) - if self.current_state.current_iteration["status"] == IterationStatus.AWAITING_USER_TEST: + if awaiting_user_test: user_feedback = await self.ask_question( "Is the bug you reported fixed now?", buttons={"yes": "Yes, the issue is fixed", "no": "No"}, diff --git a/core/agents/developer.py b/core/agents/developer.py index bbcffc1fa..ed7a52a58 100644 --- a/core/agents/developer.py +++ b/core/agents/developer.py @@ -68,8 +68,7 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] n_tasks = 1 log.debug(f"Breaking down the task review feedback {task_review_feedback}") await self.send_message("Breaking down the task review feedback...") - elif (self.current_state.current_iteration["status"] == IterationStatus.AWAITING_BUG_FIX or - self.current_state.current_iteration["status"] == IterationStatus.AWAITING_LOGGING): + elif self.current_state.current_iteration["status"] in (IterationStatus.AWAITING_BUG_FIX, IterationStatus.AWAITING_LOGGING): iteration = self.current_state.current_iteration current_task["task_review_feedback"] = None @@ -126,14 +125,16 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] self.set_next_steps(response, source) if iteration: - # fixme please :cry: - if ("status" in iteration) and (iteration["status"] == IterationStatus.AWAITING_BUG_FIX or - iteration["status"] == IterationStatus.AWAITING_LOGGING): - self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_REPRODUCTION if ( - iteration["status"] == IterationStatus.AWAITING_LOGGING) else IterationStatus.AWAITING_USER_TEST - else: + if "status" not in iteration or (iteration["status"] in (IterationStatus.AWAITING_USER_TEST, IterationStatus.AWAITING_BUG_REPRODUCTION)): + # This is just a support for old iterations that don't have status self.next_state.complete_iteration() self.next_state.action = f"Troubleshooting #{len(self.current_state.iterations)}" + elif iteration["status"] == IterationStatus.AWAITING_BUG_FIX: + # If bug fixing is done, ask user to test again + self.next_state.current_iteration["status"] = IterationStatus.AWAITING_USER_TEST + elif iteration["status"] == IterationStatus.AWAITING_LOGGING: + # If logging is done, ask user to reproduce the bug + self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_REPRODUCTION else: self.next_state.action = "Task review feedback" diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py index 888ec0716..cb54ce608 100644 --- a/core/agents/orchestrator.py +++ b/core/agents/orchestrator.py @@ -227,22 +227,29 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: return self.create_agent_for_step(state.current_step) if state.unfinished_iterations: - if state.current_iteration["status"] == IterationStatus.HUNTING_FOR_BUG: - # Ask the Logger to check if more logs in the code are needed + current_iteration_status = state.current_iteration["status"] + if current_iteration_status == IterationStatus.HUNTING_FOR_BUG: + # Triggering the bug hunter to start the hunt return BugHunter(self.state_manager, self.ui) - elif (state.current_iteration["status"] == IterationStatus.AWAITING_LOGGING or - state.current_iteration["status"] == IterationStatus.AWAITING_BUG_FIX or - state.current_iteration["status"] == IterationStatus.IMPLEMENT_SOLUTION): - # Ask the Logger to ask user to test new logs + elif (current_iteration_status == IterationStatus.AWAITING_LOGGING): + # Get the developer to implement logs needed for debugging return Developer(self.state_manager, self.ui) - elif (state.current_iteration["status"] == IterationStatus.AWAITING_USER_TEST or - state.current_iteration["status"] == IterationStatus.AWAITING_BUG_REPRODUCTION): - # Ask the Logger to ask user to test new logs + elif (current_iteration_status == IterationStatus.AWAITING_BUG_FIX): + # Get the developer to implement the bug fix for debugging + return Developer(self.state_manager, self.ui) + elif (current_iteration_status == IterationStatus.IMPLEMENT_SOLUTION): + # Get the developer to implement the "change" requested by the user + return Developer(self.state_manager, self.ui) + elif (current_iteration_status == IterationStatus.AWAITING_USER_TEST): + # Getting the bug hunter to ask the human to test the bug fix + return BugHunter(self.state_manager, self.ui) + elif (current_iteration_status == IterationStatus.AWAITING_BUG_REPRODUCTION): + # Getting the bug hunter to ask the human to reproduce the bug return BugHunter(self.state_manager, self.ui) - elif state.current_iteration["status"] == IterationStatus.FIND_SOLUTION: + elif current_iteration_status == IterationStatus.FIND_SOLUTION: # Find solution to the iteration problem return Troubleshooter(self.state_manager, self.ui) - elif state.current_iteration["status"] == IterationStatus.PROBLEM_SOLVER: + elif current_iteration_status == IterationStatus.PROBLEM_SOLVER: # Call Problem Solver if the user said "I'm stuck in a loop" return ProblemSolver(self.state_manager, self.ui) From 0ebe872250e2448344c4430cbec590e4a1f84846 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 24 Jul 2024 08:51:10 -0700 Subject: [PATCH 05/16] Fixes --- core/agents/bug_hunter.py | 2 +- core/agents/troubleshooter.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 430ba2080..ffbcbc775 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -105,7 +105,7 @@ async def check_logs(self, logs_message: str = None): "fix_attempted": False }] - if False and hunt_conclusion.conclusion == magic_words.PROBLEM_IDENTIFIED: + if hunt_conclusion.conclusion == magic_words.PROBLEM_IDENTIFIED: # if no need for logs, implement iteration same as before self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_FIX await self.send_message("The bug is found - I'm attempting to fix it.") diff --git a/core/agents/troubleshooter.py b/core/agents/troubleshooter.py index 7410b45cf..f39199e0b 100644 --- a/core/agents/troubleshooter.py +++ b/core/agents/troubleshooter.py @@ -33,7 +33,7 @@ class Troubleshooter(IterationPromptMixin, BaseAgent): agent_type = "troubleshooter" display_name = "Troubleshooter" - async def run(self): + async def run(self) -> AgentResponse: if self.current_state.unfinished_iterations: if self.current_state.current_iteration.get("status") == IterationStatus.FIND_SOLUTION: return await self.propose_solution() @@ -98,7 +98,9 @@ async def create_iteration(self) -> AgentResponse: await self.trace_loop("loop-feedback") elif bug_report is not None: iteration_status = IterationStatus.HUNTING_FOR_BUG - elif change_description is not None: + else: + # should be - elif change_description is not None: - but to prevent bugs with the extension + # this might be caused if we show the input field instead of buttons iteration_status = IterationStatus.FIND_SOLUTION From e7ef931153ba5fc22759401d071f7165c8316505 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 24 Jul 2024 17:31:51 -0700 Subject: [PATCH 06/16] Fixes --- core/agents/bug_hunter.py | 2 +- core/agents/developer.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index ffbcbc775..a7b462f5a 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -102,7 +102,7 @@ async def check_logs(self, logs_message: str = None): self.next_state.current_iteration["description"] = human_readable_instructions self.next_state.current_iteration["bug_hunting_cycles"] += [{ "human_readable_instructions": human_readable_instructions, - "fix_attempted": False + "fix_attempted": any(c['fix_attempted'] for c in self.current_state.current_iteration["bug_hunting_cycles"]) }] if hunt_conclusion.conclusion == magic_words.PROBLEM_IDENTIFIED: diff --git a/core/agents/developer.py b/core/agents/developer.py index ed7a52a58..95e89fd33 100644 --- a/core/agents/developer.py +++ b/core/agents/developer.py @@ -129,6 +129,10 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] # This is just a support for old iterations that don't have status self.next_state.complete_iteration() self.next_state.action = f"Troubleshooting #{len(self.current_state.iterations)}" + elif iteration["status"] == IterationStatus.IMPLEMENT_SOLUTION: + # If the user requested a change, then, we'll implement it and go straight back to testing + self.next_state.complete_iteration() + self.next_state.action = f"Troubleshooting #{len(self.current_state.iterations)}" elif iteration["status"] == IterationStatus.AWAITING_BUG_FIX: # If bug fixing is done, ask user to test again self.next_state.current_iteration["status"] = IterationStatus.AWAITING_USER_TEST From 1623804a8b5e618b973a23d751e2679bbeaed745 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 24 Jul 2024 17:32:33 -0700 Subject: [PATCH 07/16] Small refactors --- core/agents/bug_hunter.py | 1 - core/agents/orchestrator.py | 10 +++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index a7b462f5a..cbe4bd559 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -1,5 +1,4 @@ from enum import Enum -from typing import Annotated, Literal, Union from pydantic import BaseModel, Field diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py index cb54ce608..0c31132ff 100644 --- a/core/agents/orchestrator.py +++ b/core/agents/orchestrator.py @@ -231,19 +231,19 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: if current_iteration_status == IterationStatus.HUNTING_FOR_BUG: # Triggering the bug hunter to start the hunt return BugHunter(self.state_manager, self.ui) - elif (current_iteration_status == IterationStatus.AWAITING_LOGGING): + elif current_iteration_status == IterationStatus.AWAITING_LOGGING: # Get the developer to implement logs needed for debugging return Developer(self.state_manager, self.ui) - elif (current_iteration_status == IterationStatus.AWAITING_BUG_FIX): + elif current_iteration_status == IterationStatus.AWAITING_BUG_FIX: # Get the developer to implement the bug fix for debugging return Developer(self.state_manager, self.ui) - elif (current_iteration_status == IterationStatus.IMPLEMENT_SOLUTION): + elif current_iteration_status == IterationStatus.IMPLEMENT_SOLUTION: # Get the developer to implement the "change" requested by the user return Developer(self.state_manager, self.ui) - elif (current_iteration_status == IterationStatus.AWAITING_USER_TEST): + elif current_iteration_status == IterationStatus.AWAITING_USER_TEST: # Getting the bug hunter to ask the human to test the bug fix return BugHunter(self.state_manager, self.ui) - elif (current_iteration_status == IterationStatus.AWAITING_BUG_REPRODUCTION): + elif current_iteration_status == IterationStatus.AWAITING_BUG_REPRODUCTION: # Getting the bug hunter to ask the human to reproduce the bug return BugHunter(self.state_manager, self.ui) elif current_iteration_status == IterationStatus.FIND_SOLUTION: From 0a25f6ea85190403d9beade23b04966c83d2a478 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 24 Jul 2024 17:33:07 -0700 Subject: [PATCH 08/16] Changes in the bug hunder prompt --- core/prompts/bug-hunter/iteration.prompt | 15 +++------------ core/prompts/bug-hunter/log_data.prompt | 2 +- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/core/prompts/bug-hunter/iteration.prompt b/core/prompts/bug-hunter/iteration.prompt index 89ffdd84e..a0cd528b2 100644 --- a/core/prompts/bug-hunter/iteration.prompt +++ b/core/prompts/bug-hunter/iteration.prompt @@ -26,18 +26,9 @@ Focus on solving this issue in the following way: {{ next_solution_to_try }} ``` {% endif %} -{#{% include "partials/doc_snippets.prompt" %}#} -Based on this information, you need to figure out where is the problem that the user described. You have 2 options - to tell me exactly where is the problem happening or to add more logs to better determine where is the problem. If you think we should add more logs around the code to better understand the problem, tell me code snippets in which we should add the logs. If you think you know where the issue is, don't add any new logs but explain what log print tell point you to the problem, what the problem is, what is the solution to this problem and how the solution will fix the problem. What is your answer? - -{#**IMPORTANT** -I you decide to add logs, start the message with "{{ magic_words.ADD_LOGS }}". Make sure that the log starts with "PYTHAGORA_DEBUGGING_LOG". For example, if you want to add a log in a Python code `print("Hello, World!")`, you should put `print("PYTHAGORA_DEBUGGING_LOG: Hello, World!")` instead. This refers to **ONLY** the logs you want to add that are not in the codebase at the moment. All logs that are in the codebase already, you must not change. When you write code, you **MUST NOT** add any new lines of code except the logs. - -Your message will be read by a developer so you don't have to write the entire files in which you want to add logs to. Just write the lines of code where you want to add logs so that the developer reading your message understands where should they put the logs to. +Based on this information, you need to figure out where is the problem that the user described. You have 2 options - to tell me exactly where is the problem happening or to add more logs to better determine where is the problem. +If you think we should add more logs around the code to better understand the problem, tell me code snippets in which we should add the logs. If you think you know where the issue is, don't add any new logs but explain what log print tell point you to the problem, what the problem is, what is the solution to this problem and how the solution will fix the problem. What is your answer? **IMPORTANT** -If you have all the information you need to solve the problem, do not add any logs but explain what is the problem, where in the code is it and how should it be fixed and end the response with with "{{ magic_words.PROBLEM_IDENTIFIED }}". Remember, if you mentioned to add any logs, you **MUST NOT** say "{{ magic_words.PROBLEM_IDENTIFIED }}". - -{% include "partials/file_naming.prompt" %} -{% include "partials/relative_paths.prompt" %} -#} +You cannot answer with "Ensure that...", "Make sure that...", etc. In these cases, explain how should the reader of your message ensure what you want them to ensure. In most cases, they will need to add some logs to ensure something in which case tell them where to add them. diff --git a/core/prompts/bug-hunter/log_data.prompt b/core/prompts/bug-hunter/log_data.prompt index a695d9668..d74ec13ea 100644 --- a/core/prompts/bug-hunter/log_data.prompt +++ b/core/prompts/bug-hunter/log_data.prompt @@ -8,5 +8,5 @@ Here are the logs we added to the frontend: {{ frontend_logs }} ``` {% endif %}{% if fix_attempted %} -The problem wasn't solved with the last changes. You have 2 options - to tell me exactly where is the problem happening or to add more logs to better determine where is the problem. If you think we should add more logs around the code to better understand the problem, tell me code snippets in which we should add the logs. If you think you know where the issue is, don't add any new logs but explain what log print tell point you to the problem, what the problem is, what is the solution to this problem and how the solution will fix the problem. What is your answer? +The problem wasn't solved with the last changes. You have 2 options - to tell me exactly where is the problem happening or to add more logs to better determine where is the problem. If you think we should add more logs around the code to better understand the problem, tell me code snippets in which we should add the logs. If you think you know where the issue is, don't add any new logs but explain what log print tell point you to the problem, what the problem is, what is the solution to this problem and how the solution will fix the problem. What is your answer? Make sure not to repeat mistakes from before that didn't work. {% endif %} From 60b772f004195cb76841d9d73225483428112124 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 24 Jul 2024 17:38:57 -0700 Subject: [PATCH 09/16] Formatting changes --- core/agents/bug_hunter.py | 81 ++++++++++++++++++++------------------- 1 file changed, 42 insertions(+), 39 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index cbe4bd559..656166b0d 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -25,7 +25,9 @@ class HuntConclusionType(str, Enum): class HuntConclusionOptions(BaseModel): - conclusion: HuntConclusionType = Field(description=f"If more logs are needed to identify the problem, respond with '{magic_words.ADD_LOGS}'. If the problem is identified, respond with '{magic_words.PROBLEM_IDENTIFIED}'.") + conclusion: HuntConclusionType = Field( + description=f"If more logs are needed to identify the problem, respond with '{magic_words.ADD_LOGS}'. If the problem is identified, respond with '{magic_words.PROBLEM_IDENTIFIED}'." + ) class BugHunter(BaseAgent): @@ -47,44 +49,36 @@ async def run(self) -> AgentResponse: async def get_bug_reproduction_instructions(self): llm = self.get_llm() - convo = ( - AgentConvo(self) - .template( - "get_bug_reproduction_instructions", - current_task=self.current_state.current_task, - user_feedback=self.current_state.current_iteration["user_feedback"], - user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], - docs=self.current_state.docs, - next_solution_to_try=None, - ) + convo = AgentConvo(self).template( + "get_bug_reproduction_instructions", + current_task=self.current_state.current_task, + user_feedback=self.current_state.current_iteration["user_feedback"], + user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], + docs=self.current_state.docs, + next_solution_to_try=None, ) bug_reproduction_instructions = await llm(convo, temperature=0) self.next_state.current_iteration["bug_reproduction_description"] = bug_reproduction_instructions async def check_logs(self, logs_message: str = None): llm = self.get_llm() - convo = ( - AgentConvo(self) - .template( - "iteration", - current_task=self.current_state.current_task, - user_feedback=self.current_state.current_iteration["user_feedback"], - user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], - docs=self.current_state.docs, - magic_words=magic_words, - next_solution_to_try=None - ) + convo = AgentConvo(self).template( + "iteration", + current_task=self.current_state.current_task, + user_feedback=self.current_state.current_iteration["user_feedback"], + user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], + docs=self.current_state.docs, + magic_words=magic_words, + next_solution_to_try=None, ) for hunting_cycle in self.current_state.current_iteration["bug_hunting_cycles"]: - convo = (convo - .assistant(hunting_cycle["human_readable_instructions"]) - .template( - "log_data", - backend_logs=hunting_cycle["backend_logs"], - frontend_logs=hunting_cycle["frontend_logs"], - fix_attempted=hunting_cycle["fix_attempted"] - )) + convo = convo.assistant(hunting_cycle["human_readable_instructions"]).template( + "log_data", + backend_logs=hunting_cycle["backend_logs"], + frontend_logs=hunting_cycle["frontend_logs"], + fix_attempted=hunting_cycle["fix_attempted"], + ) human_readable_instructions = await llm(convo, temperature=0.5) @@ -99,10 +93,14 @@ async def check_logs(self, logs_message: str = None): hunt_conclusion = await llm(convo, parser=JSONParser(HuntConclusionOptions), temperature=0) self.next_state.current_iteration["description"] = human_readable_instructions - self.next_state.current_iteration["bug_hunting_cycles"] += [{ - "human_readable_instructions": human_readable_instructions, - "fix_attempted": any(c['fix_attempted'] for c in self.current_state.current_iteration["bug_hunting_cycles"]) - }] + self.next_state.current_iteration["bug_hunting_cycles"] += [ + { + "human_readable_instructions": human_readable_instructions, + "fix_attempted": any( + c["fix_attempted"] for c in self.current_state.current_iteration["bug_hunting_cycles"] + ), + } + ] if hunt_conclusion.conclusion == magic_words.PROBLEM_IDENTIFIED: # if no need for logs, implement iteration same as before @@ -117,17 +115,20 @@ async def check_logs(self, logs_message: str = None): return AgentResponse.done(self) async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiting_user_test: bool = False): - reproduce_bug_and_get_logs = awaiting_bug_reproduction - await self.send_message("You can reproduce the bug like this:\n\n" + self.current_state.current_iteration["bug_reproduction_description"]) + await self.send_message( + "You can reproduce the bug like this:\n\n" + + self.current_state.current_iteration["bug_reproduction_description"] + ) if awaiting_user_test: user_feedback = await self.ask_question( "Is the bug you reported fixed now?", buttons={"yes": "Yes, the issue is fixed", "no": "No"}, default="continue", buttons_only=True, - hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"] + hint="Instructions for testing:\n\n" + + self.current_state.current_iteration["bug_reproduction_description"], ) self.next_state.current_iteration["bug_hunting_cycles"][-1]["fix_attempted"] = True @@ -142,14 +143,16 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti "Please do exactly what you did in the last iteration, paste **BACKEND** logs here and click CONTINUE.", buttons={"continue": "Continue"}, default="continue", - hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"] + hint="Instructions for testing:\n\n" + + self.current_state.current_iteration["bug_reproduction_description"], ) frontend_logs = await self.ask_question( "Please paste **frontend** logs here and click CONTINUE.", buttons={"continue": "Continue"}, default="continue", - hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"] + hint="Instructions for testing:\n\n" + + self.current_state.current_iteration["bug_reproduction_description"], ) # TODO select only the logs that are new (with PYTHAGORA_DEBUGGING_LOG) From 7ada62e3812a9f39b55fd9d890c0ac76bbfb6416 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Wed, 24 Jul 2024 17:53:55 -0700 Subject: [PATCH 10/16] Use Sonnet for Buh hunter iteration --- core/agents/bug_hunter.py | 5 +++-- core/config/__init__.py | 2 ++ core/llm/anthropic_client.py | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 656166b0d..ca81cf22b 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -5,7 +5,7 @@ from core.agents.base import BaseAgent from core.agents.convo import AgentConvo from core.agents.response import AgentResponse -from core.config import magic_words +from core.config import BUG_HUNT_AGENT_NAME, magic_words from core.db.models.project_state import IterationStatus from core.llm.parser import JSONParser from core.log import get_logger @@ -61,7 +61,7 @@ async def get_bug_reproduction_instructions(self): self.next_state.current_iteration["bug_reproduction_description"] = bug_reproduction_instructions async def check_logs(self, logs_message: str = None): - llm = self.get_llm() + llm = self.get_llm(BUG_HUNT_AGENT_NAME) convo = AgentConvo(self).template( "iteration", current_task=self.current_state.current_task, @@ -90,6 +90,7 @@ async def check_logs(self, logs_message: str = None): ) .require_schema(HuntConclusionOptions) ) + llm = self.get_llm() hunt_conclusion = await llm(convo, parser=JSONParser(HuntConclusionOptions), temperature=0) self.next_state.current_iteration["description"] = human_readable_instructions diff --git a/core/config/__init__.py b/core/config/__init__.py index 20152e0fb..5fae2f21c 100644 --- a/core/config/__init__.py +++ b/core/config/__init__.py @@ -36,6 +36,7 @@ DEFAULT_AGENT_NAME = "default" DESCRIBE_FILES_AGENT_NAME = "CodeMonkey.describe_files" ROUTE_FILES_AGENT_NAME = "Troubleshooter.get_route_files" +BUG_HUNT_AGENT_NAME = "BugHunter.logs_or_fix" # Endpoint for the external documentation EXTERNAL_DOCUMENTATION_API = "http://docs-pythagora-io-439719575.us-east-1.elb.amazonaws.com" @@ -310,6 +311,7 @@ class Config(_StrictModel): default={ DEFAULT_AGENT_NAME: AgentLLMConfig(), DESCRIBE_FILES_AGENT_NAME: AgentLLMConfig(model="gpt-3.5-turbo", temperature=0.0), + BUG_HUNT_AGENT_NAME: AgentLLMConfig(model="claude-3-5-sonnet-20240620", temperature=0.0), ROUTE_FILES_AGENT_NAME: AgentLLMConfig(model="gpt-4o", temperature=0.0), } ) diff --git a/core/llm/anthropic_client.py b/core/llm/anthropic_client.py index f458a7b2b..a834079a4 100644 --- a/core/llm/anthropic_client.py +++ b/core/llm/anthropic_client.py @@ -23,7 +23,7 @@ class AnthropicClient(BaseLLMClient): def _init_client(self): self.client = AsyncAnthropic( api_key=self.config.api_key, - base_url=self.config.base_url, + # base_url=self.config.base_url, timeout=Timeout( max(self.config.connect_timeout, self.config.read_timeout), connect=self.config.connect_timeout, From d8d4b5bcecf8e43f1431e456401f42ac8899d17f Mon Sep 17 00:00:00 2001 From: LeonOstrez Date: Thu, 25 Jul 2024 11:45:49 +0200 Subject: [PATCH 11/16] fix crash --- core/agents/bug_hunter.py | 8 +++----- core/agents/error_handler.py | 1 + 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index ca81cf22b..5d60d79be 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -72,7 +72,7 @@ async def check_logs(self, logs_message: str = None): next_solution_to_try=None, ) - for hunting_cycle in self.current_state.current_iteration["bug_hunting_cycles"]: + for hunting_cycle in self.current_state.current_iteration.get("bug_hunting_cycles", []): convo = convo.assistant(hunting_cycle["human_readable_instructions"]).template( "log_data", backend_logs=hunting_cycle["backend_logs"], @@ -116,8 +116,6 @@ async def check_logs(self, logs_message: str = None): return AgentResponse.done(self) async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiting_user_test: bool = False): - reproduce_bug_and_get_logs = awaiting_bug_reproduction - await self.send_message( "You can reproduce the bug like this:\n\n" + self.current_state.current_iteration["bug_reproduction_description"] @@ -136,9 +134,9 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti if user_feedback.button == "yes": self.next_state.complete_iteration() else: - reproduce_bug_and_get_logs = True + awaiting_bug_reproduction = True - if reproduce_bug_and_get_logs: + if awaiting_bug_reproduction: # TODO how can we get FE and BE logs automatically? backend_logs = await self.ask_question( "Please do exactly what you did in the last iteration, paste **BACKEND** logs here and click CONTINUE.", diff --git a/core/agents/error_handler.py b/core/agents/error_handler.py index fd48b250d..66dbe05ce 100644 --- a/core/agents/error_handler.py +++ b/core/agents/error_handler.py @@ -112,6 +112,7 @@ async def handle_command_error(self, message: str, details: dict) -> AgentRespon "alternative_solutions": [], "attempts": 1, "status": IterationStatus.HUNTING_FOR_BUG, + "bug_hunting_cycles": [], } ] # TODO: maybe have ProjectState.finished_steps as well? would make the debug/ran_command prompts nicer too From a0feb850be5183e64a8add23b448c14cf203883d Mon Sep 17 00:00:00 2001 From: LeonOstrez Date: Thu, 25 Jul 2024 12:42:56 +0200 Subject: [PATCH 12/16] change default agent config --- core/agents/bug_hunter.py | 4 ++-- core/config/__init__.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 5d60d79be..e23a46ebb 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -5,7 +5,7 @@ from core.agents.base import BaseAgent from core.agents.convo import AgentConvo from core.agents.response import AgentResponse -from core.config import BUG_HUNT_AGENT_NAME, magic_words +from core.config import CHECK_LOGS_AGENT_NAME, magic_words from core.db.models.project_state import IterationStatus from core.llm.parser import JSONParser from core.log import get_logger @@ -61,7 +61,7 @@ async def get_bug_reproduction_instructions(self): self.next_state.current_iteration["bug_reproduction_description"] = bug_reproduction_instructions async def check_logs(self, logs_message: str = None): - llm = self.get_llm(BUG_HUNT_AGENT_NAME) + llm = self.get_llm(CHECK_LOGS_AGENT_NAME) convo = AgentConvo(self).template( "iteration", current_task=self.current_state.current_task, diff --git a/core/config/__init__.py b/core/config/__init__.py index 5fae2f21c..17f753ddd 100644 --- a/core/config/__init__.py +++ b/core/config/__init__.py @@ -36,7 +36,7 @@ DEFAULT_AGENT_NAME = "default" DESCRIBE_FILES_AGENT_NAME = "CodeMonkey.describe_files" ROUTE_FILES_AGENT_NAME = "Troubleshooter.get_route_files" -BUG_HUNT_AGENT_NAME = "BugHunter.logs_or_fix" +CHECK_LOGS_AGENT_NAME = "BugHunter.check_logs" # Endpoint for the external documentation EXTERNAL_DOCUMENTATION_API = "http://docs-pythagora-io-439719575.us-east-1.elb.amazonaws.com" @@ -311,7 +311,7 @@ class Config(_StrictModel): default={ DEFAULT_AGENT_NAME: AgentLLMConfig(), DESCRIBE_FILES_AGENT_NAME: AgentLLMConfig(model="gpt-3.5-turbo", temperature=0.0), - BUG_HUNT_AGENT_NAME: AgentLLMConfig(model="claude-3-5-sonnet-20240620", temperature=0.0), + CHECK_LOGS_AGENT_NAME: AgentLLMConfig(model="claude-3-5-sonnet-20240620", temperature=0.0), ROUTE_FILES_AGENT_NAME: AgentLLMConfig(model="gpt-4o", temperature=0.0), } ) From f1dbd3481b8f6df6c7a97d3a3ab9fc513da12f46 Mon Sep 17 00:00:00 2001 From: LeonOstrez Date: Thu, 25 Jul 2024 14:33:09 +0200 Subject: [PATCH 13/16] update default config.json setup and example-config.json --- core/config/__init__.py | 7 ++++++- core/llm/anthropic_client.py | 2 +- example-config.json | 12 ++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/core/config/__init__.py b/core/config/__init__.py index 71ae6f35f..645670e30 100644 --- a/core/config/__init__.py +++ b/core/config/__init__.py @@ -306,7 +306,12 @@ class Config(_StrictModel): Pythagora Core configuration """ - llm: dict[LLMProvider, ProviderConfig] = Field(default={LLMProvider.OPENAI: ProviderConfig()}) + llm: dict[LLMProvider, ProviderConfig] = Field( + default={ + LLMProvider.OPENAI: ProviderConfig(), + LLMProvider.ANTHROPIC: ProviderConfig(), + } + ) agent: dict[str, AgentLLMConfig] = Field( default={ DEFAULT_AGENT_NAME: AgentLLMConfig(), diff --git a/core/llm/anthropic_client.py b/core/llm/anthropic_client.py index a834079a4..f458a7b2b 100644 --- a/core/llm/anthropic_client.py +++ b/core/llm/anthropic_client.py @@ -23,7 +23,7 @@ class AnthropicClient(BaseLLMClient): def _init_client(self): self.client = AsyncAnthropic( api_key=self.config.api_key, - # base_url=self.config.base_url, + base_url=self.config.base_url, timeout=Timeout( max(self.config.connect_timeout, self.config.read_timeout), connect=self.config.connect_timeout, diff --git a/example-config.json b/example-config.json index 5afefdcfd..57a1e96d3 100644 --- a/example-config.json +++ b/example-config.json @@ -10,6 +10,13 @@ "connect_timeout": 60.0, "read_timeout": 10.0 }, + // Example config for Anthropic (see https://docs.anthropic.com/docs/api-reference) + "anthropic": { + "base_url": "https://api.anthropic.com", + "api_key": "your-api-key", + "connect_timeout": 60.0, + "read_timeout": 10.0 + }, // Example config for Azure OpenAI (see https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions) "azure": { "base_url": "https://your-resource-name.openai.azure.com/", @@ -40,6 +47,11 @@ "provider": "openai", "model": "gpt-3.5-turbo", "temperature": 0.0 + }, + "BugHunter.check_logs": { + "provider": "anthropic", + "model": "claude-3-5-sonnet-20240620", + "temperature": 0.0 } }, // Logging configuration outputs debug log to "pythagora.log" by default. If you set this to null, From 26a22629e5c75871fdc8f0eb95aaf913af0abd05 Mon Sep 17 00:00:00 2001 From: Zvonimir Sabljic Date: Thu, 25 Jul 2024 20:14:32 -0700 Subject: [PATCH 14/16] Fixes --- core/agents/bug_hunter.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index e23a46ebb..af4c977ed 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -140,23 +140,29 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti # TODO how can we get FE and BE logs automatically? backend_logs = await self.ask_question( "Please do exactly what you did in the last iteration, paste **BACKEND** logs here and click CONTINUE.", - buttons={"continue": "Continue"}, + buttons={"continue": "Continue", "done": "Bug is fixed"}, default="continue", hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"], ) - frontend_logs = await self.ask_question( - "Please paste **frontend** logs here and click CONTINUE.", - buttons={"continue": "Continue"}, - default="continue", - hint="Instructions for testing:\n\n" - + self.current_state.current_iteration["bug_reproduction_description"], - ) - - # TODO select only the logs that are new (with PYTHAGORA_DEBUGGING_LOG) - self.next_state.current_iteration["bug_hunting_cycles"][-1]["backend_logs"] = backend_logs.text - self.next_state.current_iteration["bug_hunting_cycles"][-1]["frontend_logs"] = frontend_logs.text - self.next_state.current_iteration["status"] = IterationStatus.HUNTING_FOR_BUG + if backend_logs.button == "done": + self.next_state.complete_iteration() + else: + frontend_logs = await self.ask_question( + "Please paste **frontend** logs here and click CONTINUE.", + buttons={"continue": "Continue", "done": "Bug is fixed"}, + default="continue", + hint="Instructions for testing:\n\n" + + self.current_state.current_iteration["bug_reproduction_description"], + ) + + # TODO select only the logs that are new (with PYTHAGORA_DEBUGGING_LOG) + self.next_state.current_iteration["bug_hunting_cycles"][-1]["backend_logs"] = backend_logs.text + self.next_state.current_iteration["bug_hunting_cycles"][-1]["frontend_logs"] = frontend_logs.text + self.next_state.current_iteration["status"] = IterationStatus.HUNTING_FOR_BUG + + if frontend_logs.button == "done": + self.next_state.complete_iteration() return AgentResponse.done(self) From abac01c5ec0e1e51edf221cd8a48698022e2b284 Mon Sep 17 00:00:00 2001 From: LeonOstrez Date: Fri, 26 Jul 2024 11:55:44 +0200 Subject: [PATCH 15/16] fix getting unfinished iterations --- core/agents/bug_hunter.py | 4 ++++ core/agents/task_reviewer.py | 2 +- core/db/models/project_state.py | 4 +++- tests/db/test_project_state.py | 3 ++- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index af4c977ed..30df597af 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -120,6 +120,10 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti "You can reproduce the bug like this:\n\n" + self.current_state.current_iteration["bug_reproduction_description"] ) + + if self.current_state.run_command: + await self.ui.send_run_command(self.current_state.run_command) + if awaiting_user_test: user_feedback = await self.ask_question( "Is the bug you reported fixed now?", diff --git a/core/agents/task_reviewer.py b/core/agents/task_reviewer.py index c71c33df5..7c0aa9830 100644 --- a/core/agents/task_reviewer.py +++ b/core/agents/task_reviewer.py @@ -47,7 +47,7 @@ async def review_code_changes(self) -> AgentResponse: ) llm_response: str = await llm(convo, temperature=0.7) - if "done" in llm_response.strip().lower()[-6:]: + if "done" in llm_response.strip().lower()[-7:]: return AgentResponse.done(self) else: return AgentResponse.task_review_feedback(self, llm_response) diff --git a/core/db/models/project_state.py b/core/db/models/project_state.py index cb3c86a76..5d4db95f6 100644 --- a/core/db/models/project_state.py +++ b/core/db/models/project_state.py @@ -119,7 +119,9 @@ def unfinished_iterations(self) -> list[dict]: :return: List of unfinished iterations. """ - return [iteration for iteration in self.iterations if iteration.get("status") != IterationStatus.DONE] + return [ + iteration for iteration in self.iterations if iteration.get("status") not in (None, IterationStatus.DONE) + ] @property def current_iteration(self) -> Optional[dict]: diff --git a/tests/db/test_project_state.py b/tests/db/test_project_state.py index 6c289914e..ef0d5b7b9 100644 --- a/tests/db/test_project_state.py +++ b/tests/db/test_project_state.py @@ -2,6 +2,7 @@ from sqlalchemy import select from core.db.models import Branch, File, FileContent, Project, ProjectState +from core.db.models.project_state import IterationStatus from .factories import create_project_state @@ -150,7 +151,7 @@ async def test_completing_unfinished_iterations(testdb): { "id": "abc", "description": "LLM breakdown of the iteration", - "completed": False, + "status": IterationStatus.HUNTING_FOR_BUG, } ] testdb.add(state) From 56624d70f3bf80bd1c9110f8c62e8042b470c215 Mon Sep 17 00:00:00 2001 From: LeonOstrez Date: Fri, 26 Jul 2024 12:00:46 +0200 Subject: [PATCH 16/16] formatting --- core/agents/developer.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/core/agents/developer.py b/core/agents/developer.py index 95e89fd33..5dff047c3 100644 --- a/core/agents/developer.py +++ b/core/agents/developer.py @@ -68,7 +68,10 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] n_tasks = 1 log.debug(f"Breaking down the task review feedback {task_review_feedback}") await self.send_message("Breaking down the task review feedback...") - elif self.current_state.current_iteration["status"] in (IterationStatus.AWAITING_BUG_FIX, IterationStatus.AWAITING_LOGGING): + elif self.current_state.current_iteration["status"] in ( + IterationStatus.AWAITING_BUG_FIX, + IterationStatus.AWAITING_LOGGING, + ): iteration = self.current_state.current_iteration current_task["task_review_feedback"] = None @@ -125,7 +128,9 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] self.set_next_steps(response, source) if iteration: - if "status" not in iteration or (iteration["status"] in (IterationStatus.AWAITING_USER_TEST, IterationStatus.AWAITING_BUG_REPRODUCTION)): + if "status" not in iteration or ( + iteration["status"] in (IterationStatus.AWAITING_USER_TEST, IterationStatus.AWAITING_BUG_REPRODUCTION) + ): # This is just a support for old iterations that don't have status self.next_state.complete_iteration() self.next_state.action = f"Troubleshooting #{len(self.current_state.iterations)}" @@ -246,9 +251,14 @@ def set_next_steps(self, response: TaskSteps, source: str): } for step in response.steps ] - if (len(self.next_state.unfinished_steps) > 0 and - source != "review" and (self.next_state.current_iteration is None or - self.next_state.current_iteration["status"] != IterationStatus.AWAITING_LOGGING)): + if ( + len(self.next_state.unfinished_steps) > 0 + and source != "review" + and ( + self.next_state.current_iteration is None + or self.next_state.current_iteration["status"] != IterationStatus.AWAITING_LOGGING + ) + ): self.next_state.steps += [ # TODO: add refactor step here once we have the refactor agent {