import chromadb import os import pymupdf4llm from semantic_text_splitter import MarkdownSplitter from _arango import ArangoDB from chromadb.config import Settings from dotenv import load_dotenv from chromadb.utils import embedding_functions load_dotenv('.chroma_env') class ChromaDB: def __init__(self, local_deployment: bool = False, db='sci_articles'): if local_deployment: self.db = chromadb.PersistentClient(f'chroma_{db}') else: self.db = chromadb.HttpClient( host=os.getenv('CHROMA_HOST'), settings=Settings( chroma_client_auth_provider="chromadb.auth.token_authn.TokenAuthClientProvider", chroma_client_auth_credentials=os.getenv("CHROMA_CLIENT_AUTH_CREDENTIALS"), chroma_auth_token_transport_header=os.getenv("CHROMA_AUTH_TOKEN_TRANSPORT_HEADER") ) ) max_characters = 2200 self.ts = MarkdownSplitter(max_characters) if __name__ == "__main__": chromadb = ChromaDB() print(chromadb.db.list_collections())