diff --git a/_llm/llm.py b/_llm/llm.py
index 2de1144..b128fb4 100644
--- a/_llm/llm.py
+++ b/_llm/llm.py
@@ -11,7 +11,7 @@ from ollama import (
     ChatResponse,
     Options,
 )
-
+import backoff
 import env_manager
 from colorprinter.print_color import *
 
@@ -179,6 +179,16 @@ class LLM:
         )
         return options
 
+    @backoff.on_exception(
+        backoff.expo,
+        (ResponseError, TimeoutError),
+        max_tries=3,
+        factor=2,
+        base=10,
+        on_backoff=lambda details: print_yellow(
+            f"Retrying due to error: {details['exception']}"
+        )
+    )
     def _call_remote_api(
         self, model, tools, stream, options, format, headers, think=False
     ):
@@ -198,6 +208,16 @@ class LLM:
         )
         return response
 
+    @backoff.on_exception(
+        backoff.expo,
+        (ResponseError, TimeoutError),
+        max_tries=3,
+        factor=2,
+        base=10,
+        on_backoff=lambda details: print_yellow(
+            f"Retrying due to error: {details['exception']}"
+        )
+    )
     async def _call_remote_api_async(
         self, model, tools, stream, options, format, headers, think=False
     ):
@@ -287,7 +307,17 @@ class LLM:
             if not self.chat:
                 self.messages = [self.messages[0]]
             return response_obj.message
-
+    
+    @backoff.on_exception(
+        backoff.expo,
+        (ResponseError, TimeoutError),
+        max_tries=3,
+        factor=2,
+        base=10,
+        on_backoff=lambda details: print_yellow(
+            f"Retrying due to error: {details['exception']}"
+        )
+    )
     async def _call_local_ollama_async(self, model, stream, temperature, think=False):
         """Call the local Ollama instance asynchronously (using a thread pool)."""
         import ollama