Skip to content

Commit

Permalink
feat: add ragas_cli pip pkg
Browse files Browse the repository at this point in the history
  • Loading branch information
Lanture1064 committed Jan 12, 2024
1 parent bc904b9 commit 34de7d8
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 88 deletions.
54 changes: 54 additions & 0 deletions evaluation/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Ragas CLI

A one-step Ragas cli tool to evaluate QCAG testsets generated by RAG apps. (Q = Question, C = Contexts, A = Answer, G = Ground_truth)

## Install with pip

```bash
pip install ragacli
```

## Arguments

- `--model`: Specifies the model to use for evaluation.
- Default value is "gpt-3.5-turbo". Langchain compatible.
- `--api_base`: Specifies the base URL for the API.
- Default value is "https://api.openai.com/v1".
- `--api_key`: Specifies the API key to authenticate requests.
- Not required if using psuedo-openai API server, e.g. vLLM, Fastchat, etc.
- `--embeddings`: Specifies the Huggingface embeddings model to use for evaluation.
- Embeddings will run **locally**.
- Will use OpenAI embeddings if not set.
- Better set if using psuedo-openai API server.
- `--metrics`: Specifies the metrics to use for evaluation.
- Will use Ragas default metrics if not set.
- Default metrics: `["answer_relevancy", "context_precision", "faithfulness", "context_recall", "context_relevancy"]`
- Other metrics: `"answer_similarity", "answer_correctness"`
- `--dataset`: Specifies the path to the dataset for evaluation.
- Dataset format must meet RAGAS requirements.
- Will use fiqa dataset as demo if not set.

## Usage

### Fiqa dataset demo:

```bash
python3 -m ragacli --api_key "YOUR_OPENAI_API_KEY"
```

### Evaluate with GPT-4 and `BAAI/bge-small-en` embeddings

The huggingface embeddings will run locally, so **Make sure your machine works and have [sentence-transformers](https://pypi.org/project/sentence-transformers/) installed:**

```bash
pip install sentence-transformers
```
Then run:

```bash
python3 -m ragacli --model "gpt-4" --api_key "YOUR_OPENAI_API_KEY" --embeddings "BAAI/bge-small-en" --dataset "path/to/dataset.csv"
```

### Prepare Dataset

See [**Ragas documentation**](https://docs.ragas.io/en/stable/howtos/applications/data_preparation.html)
88 changes: 0 additions & 88 deletions evaluation/ragas-sample.py

This file was deleted.

48 changes: 48 additions & 0 deletions evaluation/run/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import argparse
import src.pkg as pkg
from ragas import evaluate
from datasets import load_dataset

def run_evaluation():
parser = argparse.ArgumentParser(description='RAGAS CLI')
parser.add_argument("--model", type=str, default="gpt-3.5-turbo",
help="Specifies the model to use for evaluation. Defaults to gpt-3.5-turbo.")
parser.add_argument("--api_base", type=str, default="https://api.openai.com/v1",
help="Specifies the base URL for the API. Defaults to OpenAI.")
parser.add_argument("--api_key", type=str,
help="Specifies the API key to authenticate requests.")
parser.add_argument("--embeddings", type=str,
help="Specifies Huggingface embeddings model (or its path) to use for evaluation. Will use OpenAI embeddings if not set.")
parser.add_argument("--metrics", type=list, default=[],
help="Specifies the metrics to use for evaluation.")
parser.add_argument("--dataset", type=str,
help="Specifies the path to the dataset for evaluation. Will use fiqa dataset if not set.")

args = parser.parse_args()

model = args.model
api_base = args.api_base
api_key = args.api_key
metrics = args.metrics
dataset = args.dataset

judge_model = pkg.wrap_langchain_llm(model, api_base, api_key)

embeddings_model_name = args.embeddings

if embeddings_model_name:
embeddings = pkg.wrap_embeddings('huggingface', embeddings_model_name, None)
else:
embeddings = pkg.wrap_embeddings('openai', None, api_key)

if dataset:
test_set = load_dataset('csv', data_files=dataset)
else:
print('test_set not provided, using fiqa dataset')
fiqa = load_dataset('explodinggradients/fiqa', 'ragas_eval')
test_set = fiqa["baseline"].select(range(5))

ms = pkg.set_metrics(metrics, judge_model, embeddings, metrics)

return evaluate(test_set, ms)

27 changes: 27 additions & 0 deletions evaluation/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# make a setup.py for evaluation package

from setuptools import setup, find_packages

with open("README.md", "r", encoding="utf-8") as f:
long_description = f.read()

setup(
name="ragacli",
version="0.0.1",
author="Kielo",
author_email="lanture1064@gmail.com",
description="A one-step cli tool for RAGAS",
long_description=long_description,
long_description_content_type="text/markdown",
packages=find_packages(),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
],
python_requires=">=3.8",
install_requires=[
'ragas',
'langchain==0.0.354'
]
)
96 changes: 96 additions & 0 deletions evaluation/src/pkg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import os
from langchain.chat_models import ChatOpenAI
from ragas.llms import RagasLLM
from ragas.llms import LangchainLLM
from ragas.embeddings import RagasEmbeddings
from ragas.embeddings import OpenAIEmbeddings
from ragas.embeddings import HuggingfaceEmbeddings
from ragas.metrics.base import Metric

from ragas.metrics import (
context_precision,
context_recall,
context_relevancy,
answer_relevancy,
answer_correctness,
answer_similarity,
faithfulness
)

DEFAULT_METRICS = [
"answer_relevancy",
"context_precision",
"faithfulness",
"context_recall",
"context_relevancy"
]

def wrap_langchain_llm(
model: str,
api_base: str | None,
api_key: str | None
) -> LangchainLLM:
if api_base is None:
print('api_base not provided, assuming OpenAI default')
api_base = 'https://api.openai.com/v1'
os.environ["OPENAI_API_KEY"] = api_key
if api_key is None:
raise ValueError("api_key must be provided")
base = ChatOpenAI(model_name=model)
else:
os.environ["OPENAI_API_KEY"] = api_key
os.environ["OPENAI_API_BASE"] = api_base
base = ChatOpenAI(
model_name=model,
openai_api_key=api_key,
openai_api_base=api_base
)
return LangchainLLM(llm=base)


def set_metrics(
metrics: list[str],
llm: RagasLLM | None,
embeddings: RagasEmbeddings | None
) -> list[Metric]:
ms = []
if llm:
context_precision.llm = llm
context_recall.llm = llm
context_relevancy.llm = llm
answer_correctness.llm = llm
answer_similarity.llm = llm
faithfulness.llm = llm
if embeddings:
answer_relevancy.embeddings = embeddings
answer_correctness.embeddings = embeddings
if not metrics:
metrics = DEFAULT_METRICS
for m in metrics:
if m == 'context_precision':
ms.append(context_precision)
elif m == 'context_recall':
ms.append(context_recall)
elif m == 'context_relevancy':
ms.append(context_relevancy)
elif m == 'answer_relevancy':
ms.append(answer_relevancy)
elif m == 'answer_correctness':
ms.append(answer_correctness)
elif m == 'answer_similarity':
ms.append(answer_similarity)
elif m == 'faithfulness':
ms.append(faithfulness)
return ms

def wrap_embeddings(
model_type: str,
model_name: str | None,
api_key: str | None
) -> RagasEmbeddings:
if model_type == 'openai':
return OpenAIEmbeddings(api_key=api_key)
elif model_type == 'huggingface':
return HuggingfaceEmbeddings(model_name=model_name)
else:
raise ValueError(f"Invalid model type: {model_type}")

0 comments on commit 34de7d8

Please sign in to comment.