You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
61 lines
1.5 KiB
61 lines
1.5 KiB
import os |
|
from typing import List |
|
|
|
from ollama import Client |
|
from arango.collection import Collection |
|
|
|
from _arango._arango import Arango |
|
|
|
|
|
class CustomArango(Arango): |
|
def __init__(self, db_name = 'riksdagen', user=None, password=None): |
|
super().__init__(db_name, user, password) |
|
|
|
|
|
def make_embeddings(self, texts: List[str]) -> List[List[float]]: |
|
""" |
|
Generate embeddings for a list of texts using Ollama. |
|
|
|
Args: |
|
texts (List[str]): List of text strings to embed. |
|
|
|
Returns: |
|
List[List[float]]: List of embedding vectors. |
|
""" |
|
ollama_client = Client(host='192.168.1.12:33405') |
|
|
|
embeddings = ollama_client.embed( |
|
model="qwen3-embedding:latest", |
|
input=texts, |
|
dimensions=384 |
|
) |
|
return embeddings.embeddings |
|
|
|
arango = CustomArango( |
|
db_name="riksdagen", |
|
user='riksdagen', |
|
password=os.getenv("ARANGO_PWD"), |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
embeddings = arango.make_embeddings(["Vilka åtgärder bör vidtas för att hantera klimatförändringar?"]) |
|
query = """LET query = @query_embedding |
|
|
|
FOR doc IN chunks |
|
LET score = APPROX_NEAR_COSINE(doc.embedding, query) |
|
SORT score DESC |
|
LIMIT 5 |
|
RETURN { |
|
_key: doc._key, |
|
debate: doc.debate, |
|
text: doc.text, |
|
similarity: score |
|
} |
|
""" |
|
result = arango.db.aql.execute(query=query, bind_vars={"query_embedding": embeddings[0]}) |
|
for doc in result: |
|
print(doc) |
|
print('---') |
|
|
|
|