prompt engineering

lamalab-org · Sep 26, 2023 · f73a76c · f73a76c
1 parent 1635f89
commit f73a76c
Show file tree

Hide file tree

Showing 7 changed files with 148 additions and 431 deletions.
diff --git a/docs/source/usage.rst b/docs/source/usage.rst
@@ -60,4 +60,16 @@ from langchain.llms import OpenAI
 classifier = FewShotClassifier(LangChainChatModelWrapper(ChatAnthropic()))
 # or classifier = FewShotClassifier(OpenAI())
 classifier.fit(X, y)
-```
+classifier.predict(X)
+```
+
+Note that the logic is built such that if the number of extracted outputs is not equal to the number query points, we will return :code:`None` 
+as prediction for all query points. This is the case because with the current fixed prompt setup, we cannot unambiguously assign the outputs to the query points. 
+
+Classification 
+----------------
+
+
+
+Regression
+--------------
diff --git a/experiments/run_experiments_anthropic.py b/experiments/run_experiments_anthropic.py
@@ -0,0 +1,98 @@
+from chemlift.icl.utils import LangChainChatModelWrapper
+from langchain.chat_models import ChatAnthropic
+from chemlift.icl.fewshotclassifier import FewShotClassifier
+from chemlift.icl.fewshotpredictor import Strategy
+from gptchem.data import get_photoswitch_data
+from sklearn.model_selection import train_test_split
+from gptchem.evaluator import evaluate_classification
+import time
+from fastcore.xtras import save_pickle, load_pickle
+import os
+import dotenv
+
+dotenv.load_dotenv("../.env", override=True)
+
+number_support_samples = [5, 10, 20, 50, 100]
+strategies = [Strategy.RANDOM, Strategy.DIVERSE]
+anthropic_modes = ["claude-instant-1", "claude-2"]
+
+
+def get_timestr():
+    return time.strftime("%Y-%m-%d_%H-%M-%S")
+
+
+def train_test(
+    num_support_samples,
+    strategy,
+    model,
+    num_test_points,
+    random_state=42,
+    temperature=0.8,
+    max_test=5,
+):
+    llm = LangChainChatModelWrapper(ChatAnthropic(model=model, temperature=temperature))
+
+    classifier = FewShotClassifier(
+        llm,
+        property_name="class of the transition wavelength",
+        n_support=num_support_samples,
+        strategy=strategy,
+        seed=random_state,
+        prefix="You are an expert chemist. ",
+        max_test=max_test,
+    )
+
+    data = get_photoswitch_data()
+    data = data.dropna(subset=["SMILES", "E isomer pi-pi* wavelength in nm"])
+
+    data["label"] = data["E isomer pi-pi* wavelength in nm"].apply(
+        lambda x: 1 if x > data["E isomer pi-pi* wavelength in nm"].median() else 0
+    )
+
+    data_train, data_test = train_test_split(
+        data, test_size=num_test_points, stratify=data["label"], random_state=random_state
+    )
+
+    classifier.fit(data_train["SMILES"].values, data_train["label"].values)
+    predictions = classifier.predict(data_test["SMILES"].values)
+
+    report = evaluate_classification(data_test["label"].values, predictions)
+
+    report["num_support_samples"] = num_support_samples
+    report["strategy"] = strategy.value
+    report["model"] = model
+    report["num_test_points"] = num_test_points
+    report["random_state"] = random_state
+
+    report["predictions"] = predictions
+    report["targets"] = data_test["label"].values
+    report["max_test"] = max_test
+    report["temperature"] = temperature
+
+    if not os.path.exists("results"):
+        os.makedirs("results")
+
+    save_pickle(f"results/{get_timestr()}_anthropic_report.pkl", report)
+    print(report)
+
+
+if __name__ == "__main__":
+    for seed in range(5):
+        for num_support_samples in number_support_samples:
+            for strategy in strategies:
+                for anthropic_mode in anthropic_modes:
+                    for num_test_points in [50]:
+                        for temperature in [0.2, 0.8]:
+                            for max_test in [1, 5, 10]:
+                                try:
+                                    train_test(
+                                        num_support_samples,
+                                        strategy,
+                                        anthropic_mode,
+                                        num_test_points,
+                                        random_state=seed,
+                                        temperature=temperature,
+                                        max_test=max_test,
+                                    )
+                                except Exception as e:
+                                    print(e)
diff --git a/experiments/run_experiments_hugginface.py b/experiments/run_experiments_hugginface.py
@@ -0,0 +1,17 @@
+# from langchain import HuggingFaceHub
+# llm = HuggingFaceHub(repo_id = )
+
+
+models = [3
+    "google/flan-t5-xl",
+    "bigscience/bloom",
+    "EleutherAI/pythia-70m-deduped",
+    "EleutherAI/pythia-160m-deduped",
+    "EleutherAI/pythia-410m-deduped",
+    "EleutherAI/pythia-1b-deduped",
+    "EleutherAI/pythia-2.8b-deduped",
+    "EleutherAI/pythia-6.9b-deduped",
+    "EleutherAI/pythia-12b-deduped",
+]
+
+
diff --git a/experiments/run_experiments_openai.py b/experiments/run_experiments_openai.py
@@ -0,0 +1,6 @@
+from gptchem.data import get_photoswitch_data
+from gptchem.evaluator import evaluate_classication
+
+from sklearn.model_selection import train_test_split
+
+openai_models = ["text-ada-001", "text-davinci-003", "gpt-4", "gpt-3.5-turbo"]
diff --git a/src/chemlift/icl/fewshotclassifier.py b/src/chemlift/icl/fewshotclassifier.py
@@ -9,9 +9,14 @@ class FewShotClassifier(FewShotPredictor):
 
     def _extract(self, generations, expected_len):
         generations = sum(
-            [g[0].text.replace("Answer: ", "").strip().split(",") for g in generations.generations],
+            [
+                g[0].text.split(":")[-1].replace("Answer: ", "").strip().split(",")
+                for generation in generations
+                for g in generation.generations
+            ],
             [],
         )
+        print(generations, len(generations))
         if len(generations) != expected_len:
             logger.warning(f"Expected {expected_len} generations, got {len(generations)}")
             return [None] * expected_len
@@ -29,4 +34,4 @@ def _extract(self, generations, expected_len):
 
     def predict(self, X: ArrayLike, generation_kwargs: dict = {}):
         generations = self._predict(X, generation_kwargs)
-        return self._extract(generations[0], expected_len=len(X))
+        return self._extract(generations, expected_len=len(X))
diff --git a/src/chemlift/icl/fewshotpredictor.py b/src/chemlift/icl/fewshotpredictor.py
@@ -20,11 +20,13 @@ class FewShotPredictor:
 Examples:
 {examples}
 
+Constraint: Make sure to return exactly {number} comma separated predictions. The predictions should be one of {allowed_values}. Return only the predictions.
+
 Answer:
 """
 
     template_single = """{prefix}What is {property_name} of {query} given the examples below?
-Answer concise by only the prediction on a new line, which is one of {allowed_values}.
+Answer concise by only returing the prediction, which should be one of {allowed_values}.
 
 Examples:
 {examples}
@@ -43,6 +45,7 @@ def __init__(
         strategy: Strategy = Strategy.RANDOM,
         seed: int = 42,
         prefix: str = "You are an expert chemist. ",
+        max_test: int = 5,
     ):
         self._support_set = None
         self._llm = llm
@@ -52,7 +55,7 @@ def __init__(
         self._property_name = property_name
         self._allowed_values = None
         self._materialclass = "molecules"
-        self._max_test = 10
+        self._max_test = max_test
         self._prefix = prefix
 
     def _format_examples(self, examples, targets):
@@ -126,6 +129,7 @@ def _predict(self, X: ArrayLike, generation_kwargs: dict = {}):
                     number=len(chunk),
                     materialclass=self._materialclass,
                     prefix=self._prefix,
+                    allowed_values=", ".join(map(str, list(self._allowed_values))),
                 )
             else:
                 examples = self._format_examples(support_examples, support_targets)
@@ -135,7 +139,7 @@ def _predict(self, X: ArrayLike, generation_kwargs: dict = {}):
                     property_name=self._property_name,
                     query=queries,
                     examples=examples,
-                    allowed_values=allowed_values,
+                    allowed_values=", ".join(map(str, list(self._allowed_values))),
                     prefix=self._prefix,
                 )