VikhrModels · RefalMachine · Aug 29, 2024
diff --git a/config/api_config.yaml b/config/api_config.yaml
@@ -336,6 +336,14 @@ meta-llama-3-8b-instruct-3beams:
     api_type: openai
     parallel: 6
 
+saiga_llama3_8b_v7_no_system:
+    model_name: IlyaGusev/saiga_llama3_8b
+    endpoints:
+        - api_base: http://localhost:8000/v1
+          api_key: token-abc123
+    api_type: openai
+    parallel: 6
+
 saiga_llama3_8b_v6:
     model_name: IlyaGusev/saiga_llama3_8b
     endpoints:

diff --git a/config/gen_answer_config.yaml b/config/gen_answer_config.yaml
@@ -5,12 +5,8 @@ bench_name: arena-hard-v0.1
 temperature: 0.0
 max_tokens: 2048
 num_choices: 1
+repetition_penalty: 1.1
 
 # a list of model to generate answers
 model_list:
-  - gpt-3.5-turbo-0125
-  - gpt-4-1106-preview
-  - gigachat_lite
-  - gigachat_pro
-  - gpt-4o-mini
-  - gpt-3.5-turbo-1106
+  - saiga_llama3_8b_v7_no_system
diff --git a/gen_answer.py b/gen_answer.py
@@ -33,7 +33,7 @@
 
 
 def get_answer(
-    question: dict, model: str, endpoint_info: dict, num_choices: int, max_tokens: int, temperature: float, answer_file: str, api_dict: dict
+    question: dict, model: str, endpoint_info: dict, num_choices: int, max_tokens: int, temperature: float, repetition_penalty: float, answer_file: str, api_dict: dict
 ):
     if question["category"] in temperature_config:
         temperature = temperature_config[question["category"]]
@@ -95,6 +95,7 @@ def get_answer(
                 output = chat_completion_openai(model=endpoint_info["model_name"], 
                                                 messages=conv, 
                                                 temperature=temperature, 
+                                                repetition_penalty=repetition_penalty,
                                                 max_tokens=max_tokens, 
                                                 api_dict=api_dict)
             conv.append({"role": "assistant", "content": output})
@@ -181,6 +182,7 @@ def get_answer(
                     settings["num_choices"],
                     max_tokens[index],
                     settings["temperature"],
+                    settings.get("repetition_penalty", 1.1),
                     answer_file,
                     get_endpoint(endpoint_info["endpoints"]),
                 )

diff --git a/utils.py b/utils.py
@@ -145,7 +145,7 @@ def chat_completion_yandex(model, messages, temperature, max_tokens, api_dict=No
     return output
 
 
-def chat_completion_openai(model, messages, temperature, max_tokens, num_beams=1, api_dict=None):
+def chat_completion_openai(model, messages, temperature, max_tokens, num_beams=1, repetition_penalty=1.1, api_dict=None):
     import openai
     if api_dict:
         client = openai.OpenAI(
@@ -158,14 +158,13 @@ def chat_completion_openai(model, messages, temperature, max_tokens, num_beams=1
     output = API_ERROR_OUTPUT
     for _ in range(API_MAX_RETRY):
         try:
+            extra_body={
+                "repetition_penalty": repetition_penalty,
+            }
             if num_beams > 1: # for vllm
-                extra_body={
-                    'best_of': num_beams,
-                    'use_beam_search': num_beams > 1,
-                }
-            else:
-                extra_body = None
-            # print(messages)
+                extra_body["best_of"] = num_beams
+                extra_body["use_beam_search"] = num_beams > 1
+
             completion = client.chat.completions.create(
                 model=model,
                 messages=messages,