diff --git a/Dockerfile b/Dockerfile
index 7481515..71ef91b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -23,13 +23,11 @@ RUN apt-get install -y python3.11-venv
RUN apt-get install -y python3.11-dev
RUN apt-get install -y python3-pip
+ENV CMAKE_ARGS="-DLLAMA_CUDA=on"
+
RUN \
python3 -m pip install -r requirements.txt && rm -rf ~/.cache && rm requirements.txt
-RUN python3 -m pip uninstall -y llama-cpp-python \
- && python3 -m pip install llama-cpp-python \
- --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu122
-
WORKDIR /app/lib
ENTRYPOINT ["python3", "main.py"]
diff --git a/appinfo/info.xml b/appinfo/info.xml
index fe1f5db..84aa310 100644
--- a/appinfo/info.xml
+++ b/appinfo/info.xml
@@ -9,7 +9,7 @@
See [the nextcloud admin docs](https://docs.nextcloud.com/server/latest/admin_manual/ai/index.html) for more information.
]]>
- 1.2.2
+ 1.2.3
MIT
Marcel Klehr
Alexander Piskun
@@ -25,7 +25,7 @@ See [the nextcloud admin docs](https://docs.nextcloud.com/server/latest/admin_ma
ghcr.io
nextcloud/llm2
- 1.2.2
+ 1.2.3
AI_PROVIDERS
diff --git a/lib/chains.py b/lib/chains.py
index 7f1e706..423e6af 100644
--- a/lib/chains.py
+++ b/lib/chains.py
@@ -66,11 +66,16 @@ def generate_llm_chain(file_name):
n_ctx=model_config['gpt4all_config']["n_predict"],
max_tokens=model_config["gpt4all_config"]["max_tokens"],
stop=model_config["gpt4all_config"]["stop"],
+ echo=True
)
print(f'Using: {config["llama"]["model_kwargs"]["device"]}', flush=True)
except Exception as gpu_error:
try:
- llm = LlamaCpp(model_path=path, device="cpu", max_tokens=4096)
+ llm = LlamaCpp(model_path=path, device="cpu",
+ n_ctx=model_config['gpt4all_config']["n_predict"],
+ max_tokens=model_config["gpt4all_config"]["max_tokens"],
+ stop=model_config["gpt4all_config"]["stop"],
+ echo=True)
print("Using: CPU", flush=True)
except Exception as cpu_error:
raise RuntimeError(f"Error: Failed to initialize the LLM model on both GPU and CPU.", f"{cpu_error}") from cpu_error