You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
32 lines
1.1 KiB
32 lines
1.1 KiB
import chromadb |
|
import os |
|
import pymupdf4llm |
|
from semantic_text_splitter import MarkdownSplitter |
|
from _arango import ArangoDB |
|
from chromadb.config import Settings |
|
from dotenv import load_dotenv |
|
from chromadb.utils import embedding_functions |
|
|
|
load_dotenv('.chroma_env') |
|
|
|
class ChromaDB: |
|
def __init__(self, local_deployment: bool = False, db='sci_articles'): |
|
if local_deployment: |
|
self.db = chromadb.PersistentClient(f'chroma_{db}') |
|
else: |
|
self.db = chromadb.HttpClient( |
|
host=os.getenv('CHROMA_HOST'), |
|
settings=Settings( |
|
chroma_client_auth_provider="chromadb.auth.token_authn.TokenAuthClientProvider", |
|
chroma_client_auth_credentials=os.getenv("CHROMA_CLIENT_AUTH_CREDENTIALS"), |
|
chroma_auth_token_transport_header=os.getenv("CHROMA_AUTH_TOKEN_TRANSPORT_HEADER") |
|
|
|
) |
|
) |
|
|
|
max_characters = 2200 |
|
self.ts = MarkdownSplitter(max_characters) |
|
|
|
if __name__ == "__main__": |
|
chromadb = ChromaDB() |
|
print(chromadb.db.list_collections())
|
|
|