|
|
|
|
@ -3,14 +3,12 @@ import base64 |
|
|
|
|
import re |
|
|
|
|
import traceback |
|
|
|
|
from typing import Literal, Optional |
|
|
|
|
import requests |
|
|
|
|
import tiktoken |
|
|
|
|
from ollama import ( |
|
|
|
|
Client, |
|
|
|
|
AsyncClient, |
|
|
|
|
ResponseError, |
|
|
|
|
ChatResponse, |
|
|
|
|
Tool, |
|
|
|
|
Options, |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
@ -371,7 +369,36 @@ class LLM: |
|
|
|
|
force_local: bool = False, |
|
|
|
|
): |
|
|
|
|
""" |
|
|
|
|
Generate a response based on the provided query and context. |
|
|
|
|
Attempts to generate a response using a remote API first, then falls back to |
|
|
|
|
local Ollama if the remote call fails or if force_local is True. |
|
|
|
|
|
|
|
|
|
Args: |
|
|
|
|
query (str, optional): The main query or prompt for generation. |
|
|
|
|
user_input (str, optional): Alternative user input if query is not provided. |
|
|
|
|
context (str, optional): Additional context to include in the generation. |
|
|
|
|
stream (bool, optional): Whether to stream the response. Defaults to False. |
|
|
|
|
tools (list, optional): List of tools to make available for the model. |
|
|
|
|
images (list, optional): List of images to include in the request. |
|
|
|
|
model (Literal["small", "standard", "vision", "reasoning", "tools"], optional): |
|
|
|
|
The model type to use. Defaults to "standard". |
|
|
|
|
temperature (float, optional): Temperature parameter for generation randomness. |
|
|
|
|
Uses instance default if not provided. |
|
|
|
|
messages (list[dict], optional): Pre-formatted message history. |
|
|
|
|
format (optional): Response format specification. |
|
|
|
|
think (bool, optional): Whether to enable thinking mode. Defaults to False. |
|
|
|
|
force_local (bool, optional): Force use of local Ollama instead of remote API. |
|
|
|
|
Defaults to False. |
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
|
The generated response. Type varies based on stream parameter and success: |
|
|
|
|
- For streaming: Returns stream reader object |
|
|
|
|
- For non-streaming remote success: Returns response message object |
|
|
|
|
- For local fallback: Returns local Ollama response |
|
|
|
|
- For complete failure: Returns error message string |
|
|
|
|
|
|
|
|
|
Raises: |
|
|
|
|
Prints stack trace for exceptions but doesn't propagate them, instead |
|
|
|
|
returning error messages or attempting fallback to local processing. |
|
|
|
|
""" |
|
|
|
|
model = self._prepare_messages_and_model( |
|
|
|
|
query, user_input, context, messages, images, model |
|
|
|
|
|