@ -1,26 +1,24 @@
import streamlit as st
import streamlit as st
import crossref_commons . retrieval
from _llm import LLM
from _llm import LLM
from _chromadb import ChromaDB
from _chromadb import ChromaDB
from _arango import ArangoDB
from _arango import ArangoDB
from pprint import pprint
from colorprinter . print_color import *
from colorprinter . print_color import *
import re
# Initialize databases and chatbot
chromadb = ChromaDB ( )
arango = ArangoDB ( )
chatbot = LLM ( temperature = 0.1 )
# Streamlit app setup
st . title ( " EV Cars Chatbot " )
st . write ( " Ask a question about EV car battery production: " )
# User input
def get_stream ( response ) :
user_input = st . text_input ( " Ask something " )
for i in response :
yield str ( i )
def get_chunks ( user_input , n_results = 5 ) :
query = helperbot . generate ( f """ A user asked this question: " { user_input } " " .
Generate a query for the vector database . Make sure to follow the instructions you got earlier ! """
)
# Strip the query from anything that is not a word character, number, or space
query = re . sub ( r " [^ \ w \ d \ s] " , " " , query )
print_purple ( query )
if user_input :
chunks = chromadb . db . get_collection ( " sci_articles " ) . query (
chunks = chromadb . db . get_collection ( " sci_articles " ) . query (
query_texts = user_input , n_results = 7
query_texts = query , n_results = n_results
)
)
combined_chunks = [
combined_chunks = [
{ " document " : doc , " metadata " : meta }
{ " document " : doc , " metadata " : meta }
@ -29,7 +27,15 @@ if user_input:
for i in combined_chunks :
for i in combined_chunks :
_key = i [ " metadata " ] [ " _key " ]
_key = i [ " metadata " ] [ " _key " ]
arango_metadata = arango . db . collection ( " sci_articles " ) . get ( _key ) [ " metadata " ]
arango_metadata = arango . db . collection ( " sci_articles " ) . get ( _key ) [ " metadata " ]
i [ " crossref_info " ] = arango_metadata if arango_metadata else { ' title ' : ' No title ' , ' published_date ' : ' No published date ' , ' journal ' : ' No journal ' }
i [ " crossref_info " ] = (
arango_metadata
if arango_metadata
else {
" title " : " No title " ,
" published_date " : " No published date " ,
" journal " : " No journal " ,
}
)
# Sort the combined_chunks list first by published_date, then by title
# Sort the combined_chunks list first by published_date, then by title
sorted_chunks = sorted (
sorted_chunks = sorted (
@ -42,20 +48,77 @@ if user_input:
# Group the chunks by title
# Group the chunks by title
grouped_chunks = { }
grouped_chunks = { }
article_number = 1 # Initialize article counter
for chunk in sorted_chunks :
for chunk in sorted_chunks :
title = chunk [ " crossref_info " ] [ " title " ]
title = chunk [ " crossref_info " ] [ " title " ]
chunk [ " article_number " ] = article_number # Add article number to chunk
if title not in grouped_chunks :
if title not in grouped_chunks :
grouped_chunks [ title ] = [ ]
grouped_chunks [ title ] = { ' article_number ' : article_number , ' chunks ' : [ ] }
grouped_chunks [ title ] . append ( chunk )
article_number + = 1 # Increment article counter when a new title is encountered
grouped_chunks [ title ] [ ' chunks ' ] . append ( chunk )
return grouped_chunks
# Initialize session state for chat history
if " chat_history " not in st . session_state :
st . session_state . chat_history = [ ]
st . session_state . chatbot_memory = None
st . session_state . helperbot_memory = None
# Initialize databases and chatbot
chromadb = ChromaDB ( )
arango = ArangoDB ( )
chatbot = LLM (
temperature = 0.1 ,
system_message = """ You are chatting about electric cars. Only use the information from scientific articles you are provided with to answer questions.
Format your answers in Markdown format . Be sure to reference the source of the information with ONLY the number of the article in the running text ( e . g . " <answer based on an article> [<article number>] " ) . """ ,
)
if st . session_state . chat_history :
chatbot . messages = st . session_state . chatbot_memory
helperbot = LLM (
temperature = 0 ,
model = " small " ,
max_length_answer = 500 ,
system_message = """ Take the user input and write it as a sentence that could be used as a query for a vector database.
The vector database will return text snippets that semantically match the query , so you CAN ' T USE NEGATIONS or other complex language constructs. If there is a negation in the user input, exclude that part from the query.
If the user input seems to be a follow - up question or comment , use the context from the chat history to make a relevant query .
Answer ONLY with the query , no explanation or reasoning !
""" ,
)
if st . session_state . chat_history :
helperbot . messages = st . session_state . helperbot_memory
# Streamlit app setup
st . title ( " 🚗 Electric Cars Chatbot " )
# User input
user_input = st . chat_input ( " " )
if user_input :
st . session_state . chat_history . append ( { " role " : " user " , " content " : user_input } )
for message in st . session_state . chat_history :
with st . chat_message ( message [ " role " ] ) :
if message [ ' content ' ] :
st . markdown ( message [ " content " ] )
# Show a loading message
with st . spinner ( " Getting information from database... " ) :
relevant_chunks = get_chunks ( user_input , n_results = 5 ) #! Change n_results to 7
chunks_string = " "
chunks_string = " "
for title , chunks in grouped_chunks . items ( ) :
for title , chunks in relevant _chunks. items ( ) :
chunks_content_string = " \n (...) \n " . join (
chunks_content_string = " \n (...) \n " . join (
[ chunk [ " document " ] for chunk in chunks ]
[ chunk [ " document " ] for chunk in chunks [ ' chunks ' ] ]
)
)
chunks_string + = f """ \n
chunks_string + = f """ \n
# {title}
# {title}
## {chunks[0]['crossref_info']['published_date']} in {chunks[0]['crossref_info']['journal']}
## Article number: {chunks['article_number']}
## {chunks['chunks'][0]['crossref_info']['published_date']} in {chunks['chunks'][0]['crossref_info']['journal']}
{ chunks_content_string } \n
{ chunks_content_string } \n
- - -
- - -
\n
\n
@ -71,5 +134,19 @@ ONLY use the information below to answer the question. Do not use any other info
{ user_input }
{ user_input }
'''
'''
response = chatbot . generate ( prompt )
st . write ( response )
response = chatbot . generate ( prompt , stream = True ) # Assuming chatbot.generate returns a generator
with st . chat_message ( " assistant " ) :
bot_response = st . write_stream ( get_stream ( response ) )
sources = ' ###### Sources: \n '
for title , chunks in relevant_chunks . items ( ) :
sources + = f ''' [ { chunks [ ' article_number ' ] } ] ** { title } ** :gray[ { chunks [ ' chunks ' ] [ 0 ] [ ' crossref_info ' ] [ ' journal ' ] } ( { chunks [ ' chunks ' ] [ 0 ] [ ' crossref_info ' ] [ ' published_date ' ] } )] \n '''
st . markdown ( sources )
bot_response = f ' { bot_response } \n \n { sources } '
# Append user input and response to chat history
st . session_state . chat_history . append (
{ " role " : " assistant " , " content " : bot_response }
)
st . session_state . chatbot_memory = chatbot . messages
st . session_state . helperbot_memory = helperbot . messages