diff --git a/_llm/llm.py b/_llm/llm.py index b128fb4..a33a220 100644 --- a/_llm/llm.py +++ b/_llm/llm.py @@ -59,6 +59,8 @@ class LLM: chosen_backend: str = None, tools: list = None, think: bool = False, + timeout: int = 240, + local_available: bool = False, ) -> None: """ Initialize the assistant with the given parameters. @@ -86,6 +88,9 @@ class LLM: self.messages = messages or [{"role": "system", "content": self.system_message}] self.max_length_answer = max_length_answer self.chat = chat + self.think = think + self.tools = tools or [] + self.local_available = local_available self.chosen_backend = chosen_backend @@ -97,7 +102,7 @@ class LLM: headers["X-Chosen-Backend"] = self.chosen_backend self.host_url = os.getenv("LLM_API_URL").rstrip("/api/chat/") - self.client: Client = Client(host=self.host_url, headers=headers, timeout=120) + self.client: Client = Client(host=self.host_url, headers=headers, timeout=timeout) self.async_client: AsyncClient = AsyncClient() def get_credentials(self): @@ -150,12 +155,21 @@ class LLM: message = self.prepare_images(images, message) model = self.get_model("vision") else: - model = self.get_model(model) + if model in [ + "small", + "standard", + "standard_64k", + "reasoning", + "tools", + ]: + model = self.get_model(model) + self.messages.append(message) + return model - def _build_headers(self, model, tools, think): + def _build_headers(self, model): """Build HTTP headers for API requests, including auth and backend/model info.""" headers = {"Authorization": f"Basic {self.get_credentials()}"} if self.chosen_backend and model not in [ @@ -179,16 +193,16 @@ class LLM: ) return options - @backoff.on_exception( - backoff.expo, - (ResponseError, TimeoutError), - max_tries=3, - factor=2, - base=10, - on_backoff=lambda details: print_yellow( - f"Retrying due to error: {details['exception']}" - ) - ) + # @backoff.on_exception( + # backoff.expo, + # (ResponseError, TimeoutError), + # max_tries=3, + # factor=2, + # base=10, + # on_backoff=lambda details: print_yellow( + # f"Retrying due to error: {details['exception']}" + # ) + # ) def _call_remote_api( self, model, tools, stream, options, format, headers, think=False ): @@ -308,16 +322,16 @@ class LLM: self.messages = [self.messages[0]] return response_obj.message - @backoff.on_exception( - backoff.expo, - (ResponseError, TimeoutError), - max_tries=3, - factor=2, - base=10, - on_backoff=lambda details: print_yellow( - f"Retrying due to error: {details['exception']}" - ) - ) + # @backoff.on_exception( + # backoff.expo, + # (ResponseError, TimeoutError), + # max_tries=3, + # factor=2, + # base=10, + # on_backoff=lambda details: print_yellow( + # f"Retrying due to error: {details['exception']}" + # ) + # ) async def _call_local_ollama_async(self, model, stream, temperature, think=False): """Call the local Ollama instance asynchronously (using a thread pool).""" import ollama @@ -411,11 +425,11 @@ class LLM: images: list = None, model: Optional[ Literal["small", "standard", "vision", "reasoning", "tools"] - ] = "standard", + ] = None, temperature: float = None, messages: list[dict] = None, format=None, - think=False, + think=None, force_local: bool = False, ): """ @@ -435,9 +449,10 @@ class LLM: Uses instance default if not provided. messages (list[dict], optional): Pre-formatted message history. format (optional): Response format specification. - think (bool, optional): Whether to enable thinking mode. Defaults to False. + think (bool, optional): Whether to enable thinking mode. Defaults to None. force_local (bool, optional): Force use of local Ollama instead of remote API. Defaults to False. + local_available (bool, optional): Whether local Ollama is available. Returns: The generated response. Type varies based on stream parameter and success: @@ -450,13 +465,19 @@ class LLM: Prints stack trace for exceptions but doesn't propagate them, instead returning error messages or attempting fallback to local processing. """ + if model is None and self.model: + model = self.model + elif model is None: + model = "standard" model = self._prepare_messages_and_model( query, user_input, context, messages, images, model ) temperature = temperature if temperature else self.options["temperature"] + if think is None: + think = self.think if not force_local: try: - headers = self._build_headers(model, tools, think) + headers = self._build_headers(model) options = self._get_options(temperature) response = self._call_remote_api( model, tools, stream, options, format, headers, think=think @@ -480,11 +501,13 @@ class LLM: return "An error occurred." except Exception as e: traceback.print_exc() - try: - return self._call_local_ollama(model, stream, temperature, think=think) - except Exception as e: - traceback.print_exc() - return "Both remote API and local Ollama failed. An error occurred." + + if self.local_available: + try: + return self._call_local_ollama(model, stream, temperature, think=think) + except Exception as e: + traceback.print_exc() + return "Both remote API and local Ollama failed. An error occurred." async def async_generate( self,