Skip to content

Commit

Permalink
Fix RAG Assistant
Browse files Browse the repository at this point in the history
  • Loading branch information
artitw committed Oct 8, 2024
1 parent c58ff8f commit 3eb727f
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="text2text",
version="1.7.2",
version="1.7.3",
author="artitw",
author_email="artitw@gmail.com",
description="Text2Text: Crosslingual NLP/G toolkit",
Expand All @@ -22,6 +22,7 @@
install_requires=[
'faiss-cpu',
'flask',
'beautifulsoup4',
'googledrivedownloader',
'llama-index-llms-ollama',
'ollama',
Expand Down
24 changes: 19 additions & 5 deletions text2text/rag_assistant.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
import text2text as t2t

import requests
import warnings
import urllib.parse
import urllib.request

import warnings
from bs4 import BeautifulSoup

def get_cleaned_html(url):
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')

# Remove unwanted tags
for script in soup(['script', 'style']):
script.decompose()

cleaned_text = soup.get_text(separator=' ', strip=True)

return cleaned_text

def is_valid_url(url):
try:
Expand All @@ -22,16 +35,17 @@ def __init__(self, **kwargs):
for u in urls:
if is_valid_url(u):
try:
with urllib.request.urlopen(u) as f:
texts.append(f.read())
texts.append(get_cleaned_html(u))
except Exception as e:
warnings.warn(f"Skipping URL with errors: {u}")
else:
warnings.warn(f"Skipping invalid URL: {u}")

if schema:
for t in texts:
res = t2t.Assistant.chat_completion(self, [{"role": "user", "content": t}], schema=schema)
fields = ", ".join(schema.model_fields.keys())
prompt = f'Extract {fields} from the following text:\n\n{t}'
res = t2t.Assistant.chat_completion(self, [{"role": "user", "content": prompt}], schema=schema)
res = "\n".join(f'{k}: {v}' for k,v in vars(res).items())
input_lines.append(res)
else:
Expand Down

0 comments on commit 3eb727f

Please sign in to comment.