You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
75 lines
2.2 KiB
75 lines
2.2 KiB
import streamlit as st |
|
import crossref_commons.retrieval |
|
from _llm import LLM |
|
from _chromadb import ChromaDB |
|
from _arango import ArangoDB |
|
from pprint import pprint |
|
from colorprinter.print_color import * |
|
|
|
# Initialize databases and chatbot |
|
chromadb = ChromaDB() |
|
arango = ArangoDB() |
|
chatbot = LLM(temperature=0.1) |
|
|
|
# Streamlit app setup |
|
st.title("EV Cars Chatbot") |
|
st.write("Ask a question about EV car battery production:") |
|
|
|
# User input |
|
user_input = st.text_input("Ask something") |
|
|
|
if user_input: |
|
chunks = chromadb.db.get_collection("sci_articles").query( |
|
query_texts=user_input, n_results=7 |
|
) |
|
combined_chunks = [ |
|
{"document": doc, "metadata": meta} |
|
for doc, meta in zip(chunks["documents"][0], chunks["metadatas"][0]) |
|
] |
|
for i in combined_chunks: |
|
_key = i["metadata"]["_key"] |
|
arango_metadata = arango.db.collection("sci_articles").get(_key)["metadata"] |
|
i["crossref_info"] = arango_metadata if arango_metadata else {'title': 'No title', 'published_date': 'No published date', 'journal': 'No journal'} |
|
|
|
# Sort the combined_chunks list first by published_date, then by title |
|
sorted_chunks = sorted( |
|
combined_chunks, |
|
key=lambda x: ( |
|
x["crossref_info"]["published_date"], |
|
x["crossref_info"]["title"], |
|
), |
|
) |
|
|
|
# Group the chunks by title |
|
grouped_chunks = {} |
|
for chunk in sorted_chunks: |
|
title = chunk["crossref_info"]["title"] |
|
if title not in grouped_chunks: |
|
grouped_chunks[title] = [] |
|
grouped_chunks[title].append(chunk) |
|
|
|
chunks_string = "" |
|
for title, chunks in grouped_chunks.items(): |
|
chunks_content_string = "\n(...)\n".join( |
|
[chunk["document"] for chunk in chunks] |
|
) |
|
chunks_string += f"""\n |
|
# {title} |
|
## {chunks[0]['crossref_info']['published_date']} in {chunks[0]['crossref_info']['journal']} |
|
{chunks_content_string}\n |
|
--- |
|
\n |
|
""" |
|
|
|
prompt = f'''{user_input} |
|
Below are snippets from different articles with title and date of publication. |
|
ONLY use the information below to answer the question. Do not use any other information. |
|
|
|
""" |
|
{chunks_string} |
|
""" |
|
|
|
{user_input} |
|
''' |
|
response = chatbot.generate(prompt) |
|
st.write(response) |