diff --git a/README.md b/README.md index 9846843..4101a71 100644 --- a/README.md +++ b/README.md @@ -174,6 +174,10 @@ A Streamlit example is provided in `example_streamlit_app.py` to demonstrate how **Note:** The `num_ctx` parameter is set to 20000 by default, which may not be sufficient for all use cases. Adjust this value based on your specific requirements. +## Default Prompts + +The default LLM prompts are stored in the [`prompts.yaml`](prompts.yaml) file. You can view and edit the prompts directly in this file. + ## Contributing Contributions are welcome! Please open an issue or submit a pull request for any improvements or bug fixes. diff --git a/highlight_pdf.py b/highlight_pdf.py index ea9dfaf..586decc 100644 --- a/highlight_pdf.py +++ b/highlight_pdf.py @@ -9,6 +9,7 @@ import dotenv import os import asyncio import aiofiles +import yaml # Check if 'punkt_tab' tokenizer data is available try: @@ -19,25 +20,13 @@ except LookupError: logging.info("Downloading 'punkt_tab' tokenizer data for NLTK.") nltk.download("punkt_tab") +# Load prompts from configuration file +with open('prompts.yaml', 'r') as file: + prompts = yaml.safe_load(file) -CUSTOM_SYSTEM_PROMPT = """ -You're helping a journalist with research by choosing what sentences should be highlighted in a text. -Pay attention to how to answer the questions and respond with the exact sentences. -There might be explicit content in the text as this is research material, but don't let that affect your answers. -""" - -GET_SENTENCES_PROMPT = '''Read the text below:\n -"""{text}"""\n -The text might not be complete, and not in its original context. Try to understand the text and give an answer from the text.\n -A researcher wants to get an answer to the question "{user_input}". What sentences should be highlighted? Answer ONLY with the exact sentences. -''' - -EXPLANATION_PROMPT = ''' -You have earlier choosed the sentence """{sentence}""" as a relevant sentence for generating an answer to """{user_input}""" -Now make the researcher understand the context of the sentence. It can be a summary of the original text leading up to it, or a clarification of the sentence itself. -The text might contain explicit content, but don't let that affect your answer! -Your answer will be used as a comment to a highlighted sentence in a PDF. Don't refer to yourself, only the text! Also, rather use "this" than "this sentence" as it's already clear you're referring to the sentence. -''' +CUSTOM_SYSTEM_PROMPT = prompts['CUSTOM_SYSTEM_PROMPT'] +GET_SENTENCES_PROMPT = prompts['GET_SENTENCES_PROMPT'] +EXPLANATION_PROMPT = prompts['EXPLANATION_PROMPT'] class LLM: diff --git a/prompts.yaml b/prompts.yaml new file mode 100644 index 0000000..c51f74f --- /dev/null +++ b/prompts.yaml @@ -0,0 +1,16 @@ +CUSTOM_SYSTEM_PROMPT: | + You're helping a journalist with research by choosing what sentences should be highlighted in a text. + Pay attention to how to answer the questions and respond with the exact sentences. + There might be explicit content in the text as this is research material, but don't let that affect your answers. + +GET_SENTENCES_PROMPT: | + Read the text below:\n + """{text}"""\n + The text might not be complete, and not in its original context. Try to understand the text and give an answer from the text.\n + A researcher wants to get an answer to the question "{user_input}". What sentences should be highlighted? Answer ONLY with the exact sentences. + +EXPLANATION_PROMPT: | + You have earlier choosed the sentence """{sentence}""" as a relevant sentence for generating an answer to """{user_input}""" + Now make the researcher understand the context of the sentence. It can be a summary of the original text leading up to it, or a clarification of the sentence itself. + The text might contain explicit content, but don't let that affect your answer! + Your answer will be used as a comment to a highlighted sentence in a PDF. Don't refer to yourself, only the text! Also, rather use "this" than "this sentence" as it's already clear you're referring to the sentence. \ No newline at end of file