You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

75 lines
2.2 KiB

import streamlit as st
import crossref_commons.retrieval
from _llm import LLM
from _chromadb import ChromaDB
from _arango import ArangoDB
from pprint import pprint
from colorprinter.print_color import *
# Initialize databases and chatbot
chromadb = ChromaDB()
arango = ArangoDB()
chatbot = LLM(temperature=0.1)
# Streamlit app setup
st.title("EV Cars Chatbot")
st.write("Ask a question about EV car battery production:")
# User input
user_input = st.text_input("Ask something")
if user_input:
chunks = chromadb.db.get_collection("sci_articles").query(
query_texts=user_input, n_results=7
)
combined_chunks = [
{"document": doc, "metadata": meta}
for doc, meta in zip(chunks["documents"][0], chunks["metadatas"][0])
]
for i in combined_chunks:
_key = i["metadata"]["_key"]
arango_metadata = arango.db.collection("sci_articles").get(_key)["metadata"]
i["crossref_info"] = arango_metadata if arango_metadata else {'title': 'No title', 'published_date': 'No published date', 'journal': 'No journal'}
# Sort the combined_chunks list first by published_date, then by title
sorted_chunks = sorted(
combined_chunks,
key=lambda x: (
x["crossref_info"]["published_date"],
x["crossref_info"]["title"],
),
)
# Group the chunks by title
grouped_chunks = {}
for chunk in sorted_chunks:
title = chunk["crossref_info"]["title"]
if title not in grouped_chunks:
grouped_chunks[title] = []
grouped_chunks[title].append(chunk)
chunks_string = ""
for title, chunks in grouped_chunks.items():
chunks_content_string = "\n(...)\n".join(
[chunk["document"] for chunk in chunks]
)
chunks_string += f"""\n
# {title}
## {chunks[0]['crossref_info']['published_date']} in {chunks[0]['crossref_info']['journal']}
{chunks_content_string}\n
---
\n
"""
prompt = f'''{user_input}
Below are snippets from different articles with title and date of publication.
ONLY use the information below to answer the question. Do not use any other information.
"""
{chunks_string}
"""
{user_input}
'''
response = chatbot.generate(prompt)
st.write(response)