diff --git a/_llm/llm.py b/_llm/llm.py
index b128fb4..a33a220 100644
--- a/_llm/llm.py
+++ b/_llm/llm.py
@@ -59,6 +59,8 @@ class LLM:
         chosen_backend: str = None,
         tools: list = None,
         think: bool = False,
+        timeout: int = 240,
+        local_available: bool = False,
     ) -> None:
         """
         Initialize the assistant with the given parameters.
@@ -86,6 +88,9 @@ class LLM:
         self.messages = messages or [{"role": "system", "content": self.system_message}]
         self.max_length_answer = max_length_answer
         self.chat = chat
+        self.think = think
+        self.tools = tools or []
+        self.local_available = local_available
 
         self.chosen_backend = chosen_backend
 
@@ -97,7 +102,7 @@ class LLM:
             headers["X-Chosen-Backend"] = self.chosen_backend
 
         self.host_url = os.getenv("LLM_API_URL").rstrip("/api/chat/")
-        self.client: Client = Client(host=self.host_url, headers=headers, timeout=120)
+        self.client: Client = Client(host=self.host_url, headers=headers, timeout=timeout)
         self.async_client: AsyncClient = AsyncClient()
 
     def get_credentials(self):
@@ -150,12 +155,21 @@ class LLM:
             message = self.prepare_images(images, message)
             model = self.get_model("vision")
         else:
-            model = self.get_model(model)
+            if model in [
+                "small",
+                "standard",
+                "standard_64k",
+                "reasoning",
+                "tools",
+            ]:
+                model = self.get_model(model)
+
 
         self.messages.append(message)
+
         return model
 
-    def _build_headers(self, model, tools, think):
+    def _build_headers(self, model):
         """Build HTTP headers for API requests, including auth and backend/model info."""
         headers = {"Authorization": f"Basic {self.get_credentials()}"}
         if self.chosen_backend and model not in [
@@ -179,16 +193,16 @@ class LLM:
         )
         return options
 
-    @backoff.on_exception(
-        backoff.expo,
-        (ResponseError, TimeoutError),
-        max_tries=3,
-        factor=2,
-        base=10,
-        on_backoff=lambda details: print_yellow(
-            f"Retrying due to error: {details['exception']}"
-        )
-    )
+    # @backoff.on_exception(
+    #     backoff.expo,
+    #     (ResponseError, TimeoutError),
+    #     max_tries=3,
+    #     factor=2,
+    #     base=10,
+    #     on_backoff=lambda details: print_yellow(
+    #         f"Retrying due to error: {details['exception']}"
+    #     )
+    # )
     def _call_remote_api(
         self, model, tools, stream, options, format, headers, think=False
     ):
@@ -308,16 +322,16 @@ class LLM:
                 self.messages = [self.messages[0]]
             return response_obj.message
     
-    @backoff.on_exception(
-        backoff.expo,
-        (ResponseError, TimeoutError),
-        max_tries=3,
-        factor=2,
-        base=10,
-        on_backoff=lambda details: print_yellow(
-            f"Retrying due to error: {details['exception']}"
-        )
-    )
+    # @backoff.on_exception(
+    #     backoff.expo,
+    #     (ResponseError, TimeoutError),
+    #     max_tries=3,
+    #     factor=2,
+    #     base=10,
+    #     on_backoff=lambda details: print_yellow(
+    #         f"Retrying due to error: {details['exception']}"
+    #     )
+    # )
     async def _call_local_ollama_async(self, model, stream, temperature, think=False):
         """Call the local Ollama instance asynchronously (using a thread pool)."""
         import ollama
@@ -411,11 +425,11 @@ class LLM:
         images: list = None,
         model: Optional[
             Literal["small", "standard", "vision", "reasoning", "tools"]
-        ] = "standard",
+        ] = None,
         temperature: float = None,
         messages: list[dict] = None,
         format=None,
-        think=False,
+        think=None,
         force_local: bool = False,
     ):
         """
@@ -435,9 +449,10 @@ class LLM:
                 Uses instance default if not provided.
             messages (list[dict], optional): Pre-formatted message history.
             format (optional): Response format specification.
-            think (bool, optional): Whether to enable thinking mode. Defaults to False.
+            think (bool, optional): Whether to enable thinking mode. Defaults to None.
             force_local (bool, optional): Force use of local Ollama instead of remote API.
                 Defaults to False.
+            local_available (bool, optional): Whether local Ollama is available.
 
         Returns:
             The generated response. Type varies based on stream parameter and success:
@@ -450,13 +465,19 @@ class LLM:
             Prints stack trace for exceptions but doesn't propagate them, instead
             returning error messages or attempting fallback to local processing.
         """
+        if model is None and self.model:
+            model = self.model
+        elif model is None:
+            model = "standard"
         model = self._prepare_messages_and_model(
             query, user_input, context, messages, images, model
         )
         temperature = temperature if temperature else self.options["temperature"]
+        if think is None:
+            think = self.think
         if not force_local:
             try:
-                headers = self._build_headers(model, tools, think)
+                headers = self._build_headers(model)
                 options = self._get_options(temperature)
                 response = self._call_remote_api(
                     model, tools, stream, options, format, headers, think=think
@@ -480,11 +501,13 @@ class LLM:
                         return "An error occurred."
             except Exception as e:
                 traceback.print_exc()
-        try:
-            return self._call_local_ollama(model, stream, temperature, think=think)
-        except Exception as e:
-            traceback.print_exc()
-            return "Both remote API and local Ollama failed. An error occurred."
+        
+        if self.local_available:
+            try:
+                return self._call_local_ollama(model, stream, temperature, think=think)
+            except Exception as e:
+                traceback.print_exc()
+                return "Both remote API and local Ollama failed. An error occurred."
 
     async def async_generate(
         self,