Enhance error handling in source extraction and improve query formatting in MasterAgent

8 months ago · ae87ea1c63
parent ba7eaaed2a
commit ae87ea1c63
1 changed files with 35 additions and 24 deletions
--- a/agent_research.py
+++ b/agent_research.py
@ -146,18 +146,23 @@ class ResearchReport:

        # Handle different result formats
        for item in information:
-            if "result" in item and "content" in item["result"]:
-                if isinstance(item["result"]["content"], dict):
-                    # Handle structured content like chunks
-                    for title, group in item["result"]["content"].items():
-                        if "chunks" in group:
-                            for chunk in group["chunks"]:
-                                metadata = chunk.get("metadata", {})
-                                source = f"{metadata.get('title', 'Unknown')}"
-                                if metadata.get("journal"):
-                                    source += f" ({metadata.get('journal')})"
-                                if source not in sources:
-                                    sources.append(source)
+            try:
+                if "result" in item and "content" in item["result"]:
+                    if isinstance(item["result"]["content"], dict):
+                        # Handle structured content like chunks
+                        for title, group in item["result"]["content"].items():
+                            if "chunks" in group:
+                                for chunk in group["chunks"]:
+                                    metadata = chunk.get("metadata", {})
+                                    source = f"{metadata.get('title', 'Unknown')}"
+                                    if metadata.get("journal"):
+                                        source += f" ({metadata.get('journal')})"
+                                    if source not in sources:
+                                        sources.append(source)
+            except Exception as e:
+                print_yellow(f"Error extracting sources: {e}")
+                sources.append('No source')
+

        return sources

@ -504,6 +509,8 @@ class MasterAgent(ResearchBase):
            current_step="Plan Creation", current_task="Splitting into questions."
        )

+
+        # Divide the question into multiple sub-questions
        query = f"""
        A journalist wants to get a report that answers this question: "{question}"
        THIS IS *NOT* A QUESTION YOU CAN ANSWER! Instead, you need to split it into multiple questions that can be answered through research.
@ -534,8 +541,9 @@ class MasterAgent(ResearchBase):
            current_step="Plan Creation", current_task="Creating initial research plan"
        )

+        #TODO Update the available resources in the query when more resources are added!
        query = f"""
-        Thanks! Now, create a research plan for answering the original question: "{question}".
+        Thanks! Now, create a research plan for answering the original question: "{question.replace('"', "'")}".
        Include the questions you just created and any additional steps needed to answer the original question.
        Include what type of information you need from what available sources.
        Available sources are:
@ -543,18 +551,21 @@ class MasterAgent(ResearchBase):
        - Other articles the journalists has gathered, such as blog posts, news articles, etc.
        - The journalists own notes.
        - Transcribed interviews (already done, you can't produce new ones).
-        All of the above sources are available in a database, but you need to specify what you need. Be as precise as possible.
+        - An analyzing tool that can analyze the information you gather.
+        All of the above sources are available in a database/LLM model, but you need to specify what you need. Be as precise as possible.
        As you don't have access to the sources, include steps to retrieve excerpts from articles and retrieve those that might be interesting.
        You are working in a limited context and can't access the internet or external databases, and some "best practices" might not apply, like cross-referencing sources. Therefore, make the plan basic, easy to follow and with the available sources in mind.
+        
+        *IMPORTANT! Each step should try to answer one or many of the questions you created, an result in a summary of the information you found.*
+        
        *Please structure the plan like:*
        ## Step 1:
-        - Task1: Description of task
-        - Task2: Description of task
+        - Task1: Description of task and outcome
+        - Task2: Description of task and outcome
        ## Step 2:
-        - Task1: Description of task
-        - Task2: Description of task
+        - Task1: Description of task and outcome
        Etc, with as many steps and tasks as needed.
-        Do NOT include two write the report as a step, ONLY the tasks needed to gather information. The report will be written in a later step.
+        Do NOT include the writiong of the report as a step, ONLY the tasks needed to gather information. The report will be written in a later step.
        """

        # Generate the plan and handle potential formatting issues
@ -816,8 +827,8 @@ class MasterAgent(ResearchBase):
        print(gathered_info, "\n")

        query = f'''
-        Based on the following research information, write a comprehensive report that answers the question: 
-        "{self.research_state.get('original_question', 'No question provided')}"
+        Based on the following research information, write a extensive  report that in detail answers the question: 
+        "{self.research_state.get('original_question', 'No question provided').replace('"', "'")}"
        
        Research Information:
        """
@ -826,7 +837,7 @@ class MasterAgent(ResearchBase):
        
        The report should be well-structured with appropriate headings, present the information 
        accurately, and highlight key insights. Cite sources using [number] notation when referencing specific information.
-        As the report is for journalistic reseach, please be generous with details and cases that can be used when reporting on the subject.
+        As the report is for journalistic reseach, please be generous with details and cases that can be used when reporting on the subject!
        '''

        response = self.llm.generate(query=query)
@ -842,7 +853,7 @@ class MasterAgent(ResearchBase):
 class StructureAgent(ResearchBase):
    """A small LLM for structuring text as JSON"""

-    def __init__(self, username, model: str = "small", **kwargs):
+    def __init__(self, username, model: str = "standard", **kwargs):

        super().__init__(username=username, **kwargs)
        self.model = model
@ -959,7 +970,7 @@ class ToolAgent(ResearchBase):
        You HAVE TO CHOOSE A TOOL, even if you think you can answer without it. Don't answer the question without choosing a tool.
        '''

-        response = self.llm.generate(query=query, tools=self.tools)
+        response = self.llm.generate(query=query, tools=self.tools, model="tools")

        # Extract tool calls from the response
        tool_calls = response.tool_calls if hasattr(response, "tool_calls") else []