Update transformers (#391)

mirpo · Aug 23, 2024 · 294d2a0 · 294d2a0
1 parent 09832a3
commit 294d2a0
Show file tree

Hide file tree

Showing 10 changed files with 73 additions and 64 deletions.
diff --git a/.github/renovate.json b/.github/renovate.json
@@ -4,14 +4,22 @@
     "on sunday"
   ],
   "extends": [
-    "config:recommended"
+    "config:recommended",
+    ":dependencyDashboard",
+    ":semanticCommits",
+    ":pinVersions"
   ],
   "labels": [
     "dependencies"
   ],
   "osvVulnerabilityAlerts": true,
   "prConcurrentLimit": 50,
   "prHourlyLimit": 5,
+  "vulnerabilityAlerts": {
+    "schedule": [
+      "at any time"
+    ]
+  },
   "packageRules": [
     {
       "matchManagers": [
@@ -22,7 +30,9 @@
     {
       "matchPackageNames": [
         "dev/ruff",
-        "ruff"
+        "ruff",
+        "pytest",
+        "httpx"
       ],
       "groupName": "dev python tools"
     },
@@ -41,24 +51,23 @@
     },
     {
       "matchPackagePatterns": [
-        "transformers"
+        "transformers",
+        "accelerate"
       ],
       "groupName": "transformers dep"
     },
     {
       "matchPackagePatterns": [
-        "pytest",
         "python-dotenv",
         "fastapi",
-        "httpx",
         "importlib_resources",
         "pydantic-settings",
         "torch",
         "uvicorn",
         "click",
         "colorama"
       ],
-      "groupName": "other python deps"
+      "groupName": "all python deps"
     }
   ]
 }
diff --git a/src/cli/__about__.py b/src/cli/__about__.py
@@ -1 +1 @@
-__version__ = "0.5.4"
+__version__ = "0.6.0"
diff --git a/src/templates/langchain/.env_dev b/src/templates/langchain/.env_dev
@@ -1,11 +1,2 @@
-# "mosaicml/mpt-7b-instruct" or "tiiuae/falcon-7b" or any model
 # from https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard based on your hardware
-TEXT_GENERATION_MODEL="openai-community/gpt2"
-
-# controls the randomness of language model.
-# 0.1 - to reduce creativity.
-# 1.0 - you get more random results.
-TEXT_GENERATION_TEMPERATURE=0.5
-
-# enables randomness of language model.
-TEXT_GENERATION_DO_SAMPLE=False
+TEXT_GENERATION_MODEL="HuggingFaceTB/SmolLM-135M"
diff --git a/src/templates/langchain/main.py b/src/templates/langchain/main.py
@@ -2,17 +2,17 @@
 from langchain import LLMChain, PromptTemplate
 from langchain_community.llms import HuggingFacePipeline
 from pydantic_settings import BaseSettings, SettingsConfigDict
-from transformers import pipeline
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, set_seed
 
+set_seed(42)
+device = "cpu"
 
 class Settings(BaseSettings):
     model_config = SettingsConfigDict(
         # `.env.prod` takes priority over `.env_dev`
         env_file=(".env_dev", ".env.prod"),
     )
     text_generation_model: str
-    text_generation_temperature: float
-    text_generation_do_sample: bool
 
 
 app = FastAPI()
@@ -23,16 +23,18 @@ def text_generation(text: str | None = None):
     if text is None:
         raise HTTPException(status_code=400, detail="Text must be specified.")
 
+    tokenizer = AutoTokenizer.from_pretrained(settings.text_generation_model)
+    model = AutoModelForCausalLM.from_pretrained(settings.text_generation_model).to(device)
+
     pipe = pipeline(
         "text-generation",
-        model=settings.text_generation_model,
-        do_sample=settings.text_generation_do_sample,
+        model=model,
+        tokenizer= tokenizer,
         max_new_tokens=100,
+        max_length=150,          # You can adjust this as needed
+        no_repeat_ngram_size=2,  # Prevents repeating n-grams of this size
         early_stopping=True,
-        no_repeat_ngram_size=2,
-        model_kwargs={"temperature": settings.text_generation_temperature},
     )
-
     local_llm = HuggingFacePipeline(pipeline=pipe)
 
     template = """
@@ -43,24 +45,29 @@ def text_generation(text: str | None = None):
     chain = LLMChain(llm=local_llm, verbose=True, prompt=prompt)
     result = chain.run(text).strip()
 
-    return {"answer": result, "text": text}
+    return {
+        "prompt": text,
+        "answer": result,
+    }
 
 
 @app.get("/question-answering")
 def question_answering(context: str | None = None, question: str | None = None):
     if context is None or question is None:
         raise HTTPException(status_code=400, detail="Context and question must be specified.")
 
+    tokenizer = AutoTokenizer.from_pretrained(settings.text_generation_model)
+    model = AutoModelForCausalLM.from_pretrained(settings.text_generation_model).to(device)
+
     pipe = pipeline(
         "text-generation",
-        model=settings.text_generation_model,
-        do_sample=False,
+        model=model,
+        tokenizer= tokenizer,
         max_new_tokens=100,
+        max_length=150,          # You can adjust this as needed
+        no_repeat_ngram_size=2,  # Prevents repeating n-grams of this size
         early_stopping=True,
-        no_repeat_ngram_size=2,
-        model_kwargs={"temperature": 0.1},
     )
-
     local_llm = HuggingFacePipeline(pipeline=pipe)
 
     template = """
@@ -72,4 +79,8 @@ def question_answering(context: str | None = None, question: str | None = None):
     chain = LLMChain(llm=local_llm, verbose=True, prompt=prompt)
     result = chain.run({"context": context, "question": question}).strip()
 
-    return {"answer": result, "context": context, "question": question}
+    return {
+        "context": context,
+        "question": question,
+        "answer": result,
+    }
diff --git a/src/templates/langchain/requirements.txt b/src/templates/langchain/requirements.txt
@@ -1,5 +1,5 @@
 fastapi==0.112.1
-transformers==4.42.4
+transformers[torch]==4.44.2
 pydantic-settings==2.4.0
 torch==2.4.0
 uvicorn==0.30.6

diff --git a/src/templates/langchain/tests/test_main.py b/src/templates/langchain/tests/test_main.py
@@ -1,5 +1,4 @@
 # ruff: noqa: E501
-import os
 import re
 import urllib.parse
 
@@ -8,7 +7,6 @@
 from src.templates.langchain.main import app
 
 client = TestClient(app)
-os.environ["TEXT_GENERATION_DO_SAMPLE"] = "False"
 
 
 def test_text_generation_400():
@@ -26,7 +24,7 @@ def test_text_generation_200():
     result = re.sub(r"[^a-zA-Z0-9 ]+", "", response.json()["answer"].strip())
     assert response.is_success
     assert (
-        result == "what is the moon    The moon is a celestial body and it is not a planet It is an object of the solar system The Moon is also called a planet because it orbits the sun The moon has a diameter of about 15 million kilometers It is about 24 million miles The diameter is 1000 kilometers about 1 million square miles the planet is called the moon by the Greek word for sun"
+        result == "what is the moon    1 100 km2 Moon is 20km3 The moon is about 30050 kilometers4 the Moon has a diameter of 407560 miles5 It is a satellite of the Earth6 it is not a planet7 its orbit is elliptical8 Its orbit has an eccentricity of about9 There is no atmosphere on the surface of11"
     )
 
 def test_question_answering_400():
@@ -48,5 +46,5 @@ def test_question_answering_200():
     assert response.is_success
     assert (
         result
-        == "Context Tom likes coding and designing complex distributed systemsQuestion What Tom likesAnswer He likes to write code He loves to make things And he likes the idea of having a team of people working on a project But he also likes being able to do things that are not possible in the real world So hes a big fan of the open source community I think thats what he really enjoys"
+        == "Context Tom likes coding and designing complex distributed systemsQuestion What Tom likesAnswer He likes to code and design complex systems 1 What is the difference between a computer and a machine 200 wordsA computer is a device that can be programmed to perform a task It is an electronic device which can store data process information and execute instructions A machine is any object that performs a specific task and is capable of carrying out a particular task or function Computer and machine are two different things Machine is used to carry out tasks that"
     )
diff --git a/src/templates/nlp/.env_dev b/src/templates/nlp/.env_dev
@@ -6,14 +6,5 @@ SUMMARIZE_MODEL="t5-small"
 # or any model from https://huggingface.co/models?pipeline_tag=token-classification&sort=downloads
 NER_MODEL="dslim/bert-base-NER"
 
-# or "mosaicml/mpt-7b-instruct" or "tiiuae/falcon-7b"
 # or any model from https://huggingface.co/models?pipeline_tag=text-generation
-TEXT_GENERATION_MODEL="openai-community/gpt2"
-
-# controls the randomness of language model.
-# 0.1 - to reduce creativity.
-# 1.0 - you get more random results.
-TEXT_GENERATION_TEMPERATURE=0.5
-
-# enables randomness of language model.
-TEXT_GENERATION_DO_SAMPLE=True
+TEXT_GENERATION_MODEL="HuggingFaceTB/SmolLM-135M"
diff --git a/src/templates/nlp/main.py b/src/templates/nlp/main.py
@@ -1,7 +1,9 @@
 from fastapi import FastAPI, HTTPException
 from pydantic_settings import BaseSettings, SettingsConfigDict
-from transformers import pipeline
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, set_seed
 
+set_seed(42)
+device = "cpu"
 
 class Settings(BaseSettings):
     model_config = SettingsConfigDict(
@@ -11,8 +13,6 @@ class Settings(BaseSettings):
     summarize_model: str
     ner_model: str
     text_generation_model: str
-    text_generation_temperature: float
-    text_generation_do_sample: bool
 
 
 app = FastAPI()
@@ -32,7 +32,10 @@ def summarize(text: str | None = None):
     result = summarizer(text)
     result[0]["original_text"] = text
 
-    return {"summary_text": result[0]["summary_text"], "original_text": text}
+    return {
+        "prompt": text,
+        "summary_text": result[0]["summary_text"],
+    }
 
 
 @app.get("/ner")
@@ -47,7 +50,7 @@ def ner(text: str | None = None):
 
     for item in result:
         # I round here scores, because on different platforms you can get
-        # sligtly different values for the same word, for example "0.9735824465751648" ~ "0.973582804203033"
+        # slightly different values for the same word, for example "0.9735824465751648" ~ "0.973582804203033"
         # In prod round(item["score"], 5) can be replaced with float(item["score"])
         item["score"] = round(float(item["score"]), 5)
 
@@ -61,12 +64,20 @@ def text_generation(text: str | None = None):
 
     text = text.strip()
 
-    text_generator = pipeline(
-        task="text-generation",
-        model=settings.text_generation_model,
-        do_sample=settings.text_generation_do_sample,
-        model_kwargs={"temperature": settings.text_generation_temperature},
+    tokenizer = AutoTokenizer.from_pretrained(settings.text_generation_model)
+    model = AutoModelForCausalLM.from_pretrained(settings.text_generation_model).to(device)
+
+    inputs = tokenizer.encode(text, return_tensors="pt").to(device)
+    outputs = model.generate(
+        inputs,
+        max_new_tokens=100,
+        max_length=150,          # You can adjust this as needed
+        no_repeat_ngram_size=2,  # Prevents repeating n-grams of this size
+        early_stopping=True,
     )
-    result = text_generator(text)
+    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
-    return {"generated_text": result[0]["generated_text"], "original_text": text}
+    return {
+        "prompt": text,
+        "generated_text": result,
+    }
diff --git a/src/templates/nlp/requirements.txt b/src/templates/nlp/requirements.txt
@@ -1,5 +1,5 @@
 fastapi==0.112.1
-transformers[torch]==4.42.4
+transformers[torch]==4.44.2
 pydantic-settings==2.4.0
 torch==2.4.0
 uvicorn==0.30.6

diff --git a/src/templates/nlp/tests/test_main.py b/src/templates/nlp/tests/test_main.py
@@ -3,11 +3,9 @@
 
 from fastapi.testclient import TestClient
 
-from src.templates.nlp.main import app, settings
+from src.templates.nlp.main import app
 
 client = TestClient(app)
-settings.text_generation_do_sample = False  # disable model randomization to get predictable test results
-
 
 def test_summarize_400_1():
     response = client.get("/summarize")
@@ -77,5 +75,5 @@ def test_text_generation_200():
     # don't blame me :) it's openai-community/gpt2
     assert (
         response.json()["generated_text"]
-        == "William Henry Gates III (born October 28, 1955) is an American actor, writer, and director. He is best known for his role as the character of Dr. Henry Gates in the film The Man Who Fell to Earth. He also appeared in"
+        == """William Henry Gates III (born October 28, 1955) is an American actor, director, producer, and screenwriter. He is best known for his role as the lead in the 90s television series The Crown, which was nominated for an Academy Award for Best Supporting Actor.\nThe Crown is a 30-part television drama series that premiered on ABC in 8 episodes in October of 7,12,20,31,42 and 43. The series was aired on the ABC network from"""
     )