You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

32 lines
1.1 KiB

import chromadb
import os
import pymupdf4llm
from semantic_text_splitter import MarkdownSplitter
from _arango import ArangoDB
from chromadb.config import Settings
from dotenv import load_dotenv
from chromadb.utils import embedding_functions
load_dotenv('.chroma_env')
class ChromaDB:
def __init__(self, local_deployment: bool = False, db='sci_articles'):
if local_deployment:
self.db = chromadb.PersistentClient(f'chroma_{db}')
else:
self.db = chromadb.HttpClient(
host=os.getenv('CHROMA_HOST'),
settings=Settings(
chroma_client_auth_provider="chromadb.auth.token_authn.TokenAuthClientProvider",
chroma_client_auth_credentials=os.getenv("CHROMA_CLIENT_AUTH_CREDENTIALS"),
chroma_auth_token_transport_header=os.getenv("CHROMA_AUTH_TOKEN_TRANSPORT_HEADER")
)
)
max_characters = 2200
self.ts = MarkdownSplitter(max_characters)
if __name__ == "__main__":
chromadb = ChromaDB()
print(chromadb.db.list_collections())