from _arango import ArangoDB from _llm import LLM import tiktoken def make_summaries(): # Initialize the tokenizer tokenizer = tiktoken.get_encoding("cl100k_base") def count_tokens(text): tokens = tokenizer.encode(text) return len(tokens) articles = [i for i in arango.db.aql.execute(''' for doc in sci_articles return doc ''')] for article in articles: num_tokens = count_tokens(article["text"]) llm = LLM( system_message="You are summarising scientific articles. It is very important that you keep to what is written and do not add any of your own opinions or interpretations.", num_ctx=num_tokens+3000 if num_tokens < 67000 else 70000, temperature=0, ) try: prompt = f''' Make a summary of the following text: """ {article["text"]} """ Write a detailed summary. Make sure to include information from all sections: introduction, methods, results, and conclusion. Everything about electric vehicles, and things related to electric cars, is very important. Write the summary as if you are writing for someone who is not familiar with the topic. Write it from the point of the view of the author of the text. ''' article["summary"] = { "meta": {"model": llm.llm_model, "system_message": llm.system_message, 'num_ctx': llm.options['num_ctx'], 'temperature': llm.options['temperature']}, "text_sum": llm.generate(prompt), } print(article["summary"]) arango.db.collection("sci_articles").update(article) except Exception as e: print(e) article['summary_error'] = str(e) arango.db.collection("sci_articles").update(article) continue def make_chunk_qa(): articles = [i for i in arango.db.aql.execute(''' for doc in sci_articles return doc ''')] for article in articles: questin_machine = LLM( system_message="You are creating questions based on scientific articles. The questions should be based on the text and should be answerable by the text, but you can check the conversation history to make them more relevant for the context.", num_ctx=20000, temperature=0.2, ) answer_machine = LLM( system_message="You are answering questions based on scientific articles. The answers should be based on the text, but you can check the conversation history to make them more relevant for the context.", num_ctx=20000, temperature=0.2, ) for chunk in article["chunks"]: prompt = f''' Create a question based on the following text: """ {chunk['text']} """ Write a question that can be answered by the text. Make sure to include information from all sections: introduction, methods, results, and conclusion. Everything about electric vehicles, and things related to electric cars, is very important. Write the question as if you are writing for someone who is not familiar with the topic. Write it from the point of the view of the author of the text. ''' if __name__ == "__main__": arango = ArangoDB()