sci/prompts.py


import re
#from _classes import Project

def description_string(project: "Project"):
        if project.description != "":
            description = f'The project is about "{project.description}".'
        else:
            description = ''
        return description

def use_tools(use: bool = False):
     if use:
          return 'If you need to use a tool to fetch information, you can do that as well.'
     else:
            return ''

def get_assistant_prompt():
    """
    Returns a multi-line string that serves as a prompt for a research assistant AI.

    Returns:
        str: The prompt for the research assistant AI.
    """
    return """You are a research assistant chatting with a researcher. Only use the information from scientific articles you are provided with to answer questions.
    The articles are ordered by publication date, so the first article is the oldest one. Sometimes the articles might contain conflicting information, in that case be clear about the conflict and provide both sides of the argument with publication dates taken into account.
    Be sure to reference the source of the information with the number of the article inside square brackets (e.g. "<answer based on an article> [article number]").
    If you have to reference the articles in running text, e.g. in a headline or the beginning of a bullet point, use the title of the article. Do NOT write something like "Article 1" but use the actual title of the article.
    You should not write a reference section as this will be added later.
    Format your answers in Markdown format. """

def get_editor_prompt(project: "Project"):
    """Generates a coaching prompt for an editor to assist a reporter with a specific project.

    Args:
        project (dict): A dictionary containing information about the project the reporter is working on. The dictionary should have the following keys:
            - name (str): The name of the project.
            - description (str): The description of the project.
            - notes_summary (str): A summary of notes about the project and
    Returns:
        str: A formatted string containing the coaching prompt for the editor."""


    if project.notes_summary:
        notes_string = f'''Here are other important things you should know about the project and the topic:
    """
    {project.notes_summary}
    """
    '''
    else:
        notes_string = ''

    return f'''You are an editor coaching a journalist who is working on the project "{project.name}". {description_string(project)}
    {notes_string}
    When writing with the reporter you will _often_ get other information, like excerpts from articles and other documents. Use the notes to put the information in context and help the reporter to move forward.
    If no other information is provided, try to answer based on the conversation history. If there is no history, and you're requested to answer in a conversational way, don't pretent to know things you don't have information about.
    The project is a journalistic piece, so it is important that you help the reporter to be critical of the sources and to provide a balanced view of the topic.
    Be sure to understand what the reporter is asking and provide the information in a way that is helpful for the reporter to move forward. Try to understand if the reporter is asking for a specific piece of information or if they are looking for guidance on how to move forward, or just want to discuss the topic.
    If you need more information to answer the question, try to get it.
    '''

def get_chat_prompt(user_input, role, content_string=None, content_attachment=None, image_attachment=False):

    if image_attachment:
        return f'''{user_input}
        Use the attached image to write your response.
        '''

    elif content_attachment:
            return f'''{user_input}
        Content of the attached file:
         """
         {content_attachment}
         """
         Respond to "{user_input}" based on the information in the attachment.
         Fomat your answer in a way that is easy to understand for a general audience, and in an basic Markdown format.
         '''


    elif role == "Research Assistant":

        prompt = f'''{user_input}
        Below are snippets from different articles, often with title and date of publication.
        ONLY use the information below to answer the question. Do not use any other information.

        """
        {content_string}
        """
        Remember:
        - Reference the source of the information with the number of the article inside square brackets (e.g. "<answer based on an article> [article number]").
        - If you have to reference the articles in running text, e.g. in a headline or the beginning of a bullet point, use the title of the article. Do NOT write something like "Article 1" but use the actual title of the article.
        - The articles are ordered by publication date, so the first article is the oldest one. Sometimes the articles might contain conflicting information, in that case be clear about the conflict and provide both sides of the argument with publication dates taken into account.

        {user_input}
        '''

    elif role == "Editor":
        prompt = f'''The reporter has asked: "{user_input}". Try to answer the question or provide guidance based on the information below, and your knowledge of the project.

        """
        {content_string}
        """
        Remember:
        - Sometimes the articles might contain conflicting information, in that case be clear about the conflict and provide both sides of the argument with publication dates taken into account.
        - If you think you need more information to answer the question, ask the reporter for more context.

        {user_input}
        '''

    elif role == "Guest":
        prompt = f'''The podcast host has asked: "{user_input}". Try to answer the question based on the information below.

        """
        {content_string}
        """
        Remember:
        - Answer in a way that is easy to understand for a general audience.
        - Only answer based on the information above.
        - Answer in a "spoken" way, formatting the answer as if you were speaking it out loud.

        {user_input}
        '''

    elif role == "Host":
        prompt = f'''The expert has stated: "{user_input}". Try to come up with a new question based on the information below.

        """
        {content_string}
        """
        Remember:
        - The information above is the context for the expert's statement, so the new question should be relevant to that context, as well as the conversation as a whole.
        - You are a critical journalist, so try to come up with a question that challenges the expert's statement or asks for more information.
        - Make sure not to repeat yourself! Check what questions you have already asked to avoid repetition.
        '''
    return prompt

def get_query_builder_system_message():
    system_message = """
        Take the user input and write it as a sentence that could be used as a query for a vector database.
        The vector database will return text snippets that semantically match the query, so you CAN'T USE NEGATIONS or other complex language constructs. If there is a negation in the user input, exclude that part from the query.
        If the user input seems to be a follow-up question or comment, use the context from the chat history to make a relevant query.
        Remember that the query is meant to return information on a specific topic, so make sure the query is focused on that topic. Don't make a query to search for tools or methods (if it's not information about a specific method, e.g. a scientific method), only for actual information.
        Answer ONLY with the query, NO explanation or reasoning!
        """
    return re.sub(r"\s*\n\s*", "\n", system_message)

def get_note_summary_prompt(project: "Project", notes_string: str):
        query = f'''
        Below are notes from a project called "{project.name}". {description_string(project)}.
        """
        {notes_string}
        """
        I want you to summarize the notes in a concise manner. The summary will be used to create a system message for chatting with LLMs about the project.
        Make sure to include the most important points, and include any key terms or concepts.
        Try to gather at least something from each note, but don't reference the notes directly.
        Answer ONLY with the summary, nothing else.
        '''
        return re.sub(r"\s*\n\s*", "\n", query)


def get_image_system_prompt(project):

    system_message = f"""
    You are an assistant to a journalist who is working on a project called {project.name}. Your task is to analyze and describe the images that are part of the project.
    The images you get might show a graph, chart or other visual representation of data. If so, answer in a way that describes the data and the trends or patterns that can be seen in the image.
    The images might also show a photo or illustration, in that case describe the content of the image in a way that could be used in a caption.
    - Don't include any information that is not visible in the image.
    - Don't focus to much on the different parts of the image/figure, but rather on the meaning of it.
    - Answer ONLY with the description of the image, nothing else argumenting or explaining.
    """
    return re.sub(r"\s*\n\s*", "\n", system_message)

def get_tools_prompt(user_input):
    prompt = f'''User message: "{user_input}"
    You have to choose one or many tools in order to answer the message. It's important that you think of what information is needed to make a good answer.
    Make sure to read the description of the tools carefully before choosing! E.g. chose the conversational response tool ONLY if the user is small talking or asking, use other tools if the user is asking a question or want information.
    You can ONLY chose a tool you are provided with, don't make up a tool!
    You HAVE TO CHOOSE A TOOL, even if you think you can answer without it. Don't answer the question without choosing a tool.
    '''
    # prompt = f'''User message: "{user_input}"
    # What tool(s) would you use to answer the question? Choose one or more tools that you think would be most helpful.
    # Answer with a tool, not a response to the user's message.
    # '''
    return re.sub(r"\s*\n\s*", "\n", prompt)


def get_summary_prompt(text, is_sci):
    text = re.sub(r"\s*\n\s*", "\n", text)
    if is_sci:
        s = 'The text will be used as an abstract for a scientific article. Make sure to include the most important results and findings, as well with relevant information about methods, data and conclusions. Keep the summary concise.'
    else:
        s = 'Make sure to include the most important points and facts, and to keep the summary concise.'

    prompt = f'''Summarise the text below:
        """
        {text}
        """
        {s}
        Use ONLY the information from the text to write the summary. Do not include any additional information or your own interpretation.
        Answer ONLY with the summary, nothing else like reasoning or explanation.
        '''
    return re.sub(r"\s*\n\s*", "\n", prompt)


def get_generate_vector_query_prompt(user_input: str, role: str):
    print(role.upper())
    if role == "Guest":
        query = f"""A podcast host has asked this question in an interview: "{user_input}". Generate a query for the vector database to answer the actial question."""
    elif role == "Host":
        query = f"""An expert has stated: "{user_input}". Generate a query for the vector database to get context for that answer in order to come up with a new question."""
    else:
        query = f"""A user asked this question: "{user_input}". Generate a query for the vector database"""
    query += "\nMake sure to follow the instructions you got earlier!"
    return query