diff --git a/Dockerfile b/Dockerfile
index 7481515..71ef91b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,13 +23,11 @@ RUN apt-get install -y python3.11-venv
 RUN apt-get install -y python3.11-dev
 RUN apt-get install -y python3-pip
 
+ENV CMAKE_ARGS="-DLLAMA_CUDA=on"
+
 RUN \
   python3 -m pip install -r requirements.txt && rm -rf ~/.cache && rm requirements.txt
 
-RUN python3 -m pip uninstall -y llama-cpp-python \
-    && python3 -m pip install llama-cpp-python \
-      --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu122
-
 WORKDIR /app/lib
 ENTRYPOINT ["python3", "main.py"]
 
diff --git a/appinfo/info.xml b/appinfo/info.xml
index fe1f5db..84aa310 100644
--- a/appinfo/info.xml
+++ b/appinfo/info.xml
@@ -9,7 +9,7 @@
 
 See [the nextcloud admin docs](https://docs.nextcloud.com/server/latest/admin_manual/ai/index.html) for more information.
 	]]></description>
-	<version>1.2.2</version>
+	<version>1.2.3</version>
 	<licence>MIT</licence>
 	<author mail="mklehr@gmx.net" homepage="https://github.com/marcelklehr">Marcel Klehr</author>
 	<author mail="bigcat88@icloud.com" homepage="https://github.com/bigcat88">Alexander Piskun</author>
@@ -25,7 +25,7 @@ See [the nextcloud admin docs](https://docs.nextcloud.com/server/latest/admin_ma
 		<docker-install>
 			<registry>ghcr.io</registry>
 			<image>nextcloud/llm2</image>
-			<image-tag>1.2.2</image-tag>
+			<image-tag>1.2.3</image-tag>
 		</docker-install>
 		<scopes>
 			<value>AI_PROVIDERS</value>
diff --git a/lib/chains.py b/lib/chains.py
index 7f1e706..423e6af 100644
--- a/lib/chains.py
+++ b/lib/chains.py
@@ -66,11 +66,16 @@ def generate_llm_chain(file_name):
             n_ctx=model_config['gpt4all_config']["n_predict"],
             max_tokens=model_config["gpt4all_config"]["max_tokens"],
             stop=model_config["gpt4all_config"]["stop"],
+            echo=True
         )
         print(f'Using: {config["llama"]["model_kwargs"]["device"]}', flush=True)
     except Exception as gpu_error:
         try:
-            llm = LlamaCpp(model_path=path, device="cpu", max_tokens=4096)
+            llm = LlamaCpp(model_path=path, device="cpu",
+                           n_ctx=model_config['gpt4all_config']["n_predict"],
+                           max_tokens=model_config["gpt4all_config"]["max_tokens"],
+                           stop=model_config["gpt4all_config"]["stop"],
+                           echo=True)
             print("Using: CPU", flush=True)
         except Exception as cpu_error:
             raise RuntimeError(f"Error: Failed to initialize the LLM model on both GPU and CPU.", f"{cpu_error}") from cpu_error