diff --git a/_llm/llm.py b/_llm/llm.py index 2de1144..b128fb4 100644 --- a/_llm/llm.py +++ b/_llm/llm.py @@ -11,7 +11,7 @@ from ollama import ( ChatResponse, Options, ) - +import backoff import env_manager from colorprinter.print_color import * @@ -179,6 +179,16 @@ class LLM: ) return options + @backoff.on_exception( + backoff.expo, + (ResponseError, TimeoutError), + max_tries=3, + factor=2, + base=10, + on_backoff=lambda details: print_yellow( + f"Retrying due to error: {details['exception']}" + ) + ) def _call_remote_api( self, model, tools, stream, options, format, headers, think=False ): @@ -198,6 +208,16 @@ class LLM: ) return response + @backoff.on_exception( + backoff.expo, + (ResponseError, TimeoutError), + max_tries=3, + factor=2, + base=10, + on_backoff=lambda details: print_yellow( + f"Retrying due to error: {details['exception']}" + ) + ) async def _call_remote_api_async( self, model, tools, stream, options, format, headers, think=False ): @@ -287,7 +307,17 @@ class LLM: if not self.chat: self.messages = [self.messages[0]] return response_obj.message - + + @backoff.on_exception( + backoff.expo, + (ResponseError, TimeoutError), + max_tries=3, + factor=2, + base=10, + on_backoff=lambda details: print_yellow( + f"Retrying due to error: {details['exception']}" + ) + ) async def _call_local_ollama_async(self, model, stream, temperature, think=False): """Call the local Ollama instance asynchronously (using a thread pool).""" import ollama