You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
14 lines
409 B
14 lines
409 B
import chromadb |
|
import os |
|
import pymupdf4llm |
|
from semantic_text_splitter import MarkdownSplitter |
|
from _arango import ArangoDB |
|
from pprint import pprint |
|
|
|
class ChromaDB: |
|
def __init__(self): |
|
self.db = chromadb.PersistentClient("chroma_db") |
|
max_characters = 2200 |
|
self.ts = MarkdownSplitter(max_characters) |
|
self.sci_articles = self.db.get_or_create_collection("sci_articles") |
|
|
|
|