Skip to content

Commit

Permalink
fix: Allow running on GPU
Browse files Browse the repository at this point in the history
Signed-off-by: Marcel Klehr <mklehr@gmx.net>
  • Loading branch information
marcelklehr committed Jul 3, 2024
1 parent 562ecbc commit 3ff9c4b
Show file tree
Hide file tree
Showing 6 changed files with 36 additions and 8 deletions.
19 changes: 18 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
FROM python:3.11-slim-bookworm
FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive

RUN apt-get update && \
apt-get install -y \
software-properties-common

COPY requirements.txt /

Expand All @@ -10,9 +16,20 @@ ADD li[b] /app/lib
ADD model[s] /app/models
ADD default_confi[g] /app/default_config

RUN add-apt-repository -y ppa:deadsnakes/ppa
RUN apt-get update
RUN apt-get install -y python3.11
RUN apt-get install -y python3.11-venv
RUN apt-get install -y python3.11-dev
RUN apt-get install -y python3-pip

RUN \
python3 -m pip install -r requirements.txt && rm -rf ~/.cache && rm requirements.txt

RUN python3 -m pip uninstall -y llama-cpp-python \
&& python3 -m pip install llama-cpp-python \
--extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu122

WORKDIR /app/lib
ENTRYPOINT ["python3", "main.py"]

Expand Down
8 changes: 8 additions & 0 deletions default_config/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,14 @@
"stop": ["<|im_end|>"]
}
},
"meta-Llama-3-8B-instruct.Q4_K_M": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
"max_tokens": 8000,
"n_predict": 4000,
"stop": ["<|im_end|>"]
}
},
"default": {
"prompt": "<|im_start|> system\n{system_prompt}\n<|im_end|>\n<|im_start|> user\n{user_prompt}\n<|im_end|>\n<|im_start|> assistant\n",
"gpt4all_config": {
Expand Down
8 changes: 5 additions & 3 deletions lib/chains.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,11 @@ def generate_llm_chain(file_name):
try:
llm = LlamaCpp(
model_path=path,
device=config["llama"]["model_kwargs"]["device"],
model_kwargs={'device': config["llama"]["model_kwargs"]["device"]},
n_gpu_layers=config["llama"]["n_gpu_layers"],
n_ctx=model_config['gpt4all_config']["n_predict"],
max_tokens=model_config["gpt4all_config"]["max_tokens"]
max_tokens=model_config["gpt4all_config"]["max_tokens"],
stop=model_config["gpt4all_config"]["stop"],
)
print(f'Using: {config["llama"]["model_kwargs"]["device"]}', flush=True)
except Exception as gpu_error:
Expand All @@ -87,7 +88,8 @@ def generate_chains():
if file.name.endswith(".gguf"):
model_name = file.name.split('.gguf')[0]

llm_chain = lambda: generate_llm_chain(file.name)
chain = [None]
llm_chain = lambda: chain[-1] if chain[-1] is not None else chain.append(generate_llm_chain(file.name)) or chain[-1]

chains[model_name + ":summary"] = lambda: SummarizeChain(llm_chain=llm_chain())
chains[model_name + ":headline"] = lambda: HeadlineChain(llm_chain=llm_chain())
Expand Down
2 changes: 1 addition & 1 deletion lib/free_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ def _call(

@property
def _chain_type(self) -> str:
return "summarize_chain"
return "free_prompt"
3 changes: 2 additions & 1 deletion lib/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,14 @@ def run(self, *args, **kwargs): # pylint: disable=unused-argument
chain = chain_load()
print("Generating reply", flush=True)
time_start = perf_counter()
print(task.get("prompt"))
result = chain.invoke(task.get("prompt")).get("text")
del chain
print(f"reply generated: {round(float(perf_counter() - time_start), 2)}s", flush=True)
print(result, flush=True)
NextcloudApp().providers.text_processing.report_result(
task["id"],
str(result).split(sep="<|assistant|>", maxsplit=1)[-1].strip(),
str(result),
)
except Exception as e: # noqa
print(str(e), flush=True)
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@ transformers>=4.36.1
accelerate
huggingface_hub
pydantic
langchain==0.1.0
langchain==0.1.12
langchain-community
llama-cpp-python
llama-cpp-python

0 comments on commit 3ff9c4b

Please sign in to comment.