-
Notifications
You must be signed in to change notification settings - Fork 0
/
openai_embedings.js
133 lines (112 loc) · 4.95 KB
/
openai_embedings.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
// Import document loaders for different file formats
import { DirectoryLoader } from "langchain/document_loaders/fs/directory";
import { JSONLoader } from "langchain/document_loaders/fs/json";
import { TextLoader } from "langchain/document_loaders/fs/text";
import { CSVLoader } from "langchain/document_loaders/fs/csv";
import { PDFLoader } from "langchain/document_loaders/fs/pdf";
import { DocxLoader } from "langchain/document_loaders/fs/docx";
import { ChatPromptTemplate } from "@langchain/core/prompts";
import { pull } from "langchain/hub";
import { StringOutputParser } from "@langchain/core/output_parsers";
import { PromptTemplate } from "@langchain/core/prompts";
import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
// Import OpenAI language model and other related modules
import { OpenAI } from "@langchain/openai";
import { RetrievalQAChain } from "langchain/chains";
import { HNSWLib } from "@langchain/community/vectorstores/hnswlib";
import { OpenAIEmbeddings, ChatOpenAI } from "@langchain/openai";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
// Import dotenv for loading environment variables and fs for file system operations
import dotenv from "dotenv";
import fs from "fs";
dotenv.config();
// Initialize the document loader with supported file formats
const loader = new DirectoryLoader("./data", {
".json": (path) => new JSONLoader(path),
".txt": (path) => new TextLoader(path),
".csv": (path) => new CSVLoader(path),
".pdf": (path) => new PDFLoader(path),
".docx": (path) => new DocxLoader(path),
});
// Load documents from the specified directory
console.log("Loading docs...");
const docs = await loader.load();
console.log("Docs loaded.");
const VECTOR_STORE_PATH = "Data.index";
// Define a function to normalize the content of the documents
function normalizeDocuments(docs) {
return docs.map((doc) => {
if (typeof doc.pageContent === "string") {
return doc.pageContent;
} else if (Array.isArray(doc.pageContent)) {
return doc.pageContent.join("\n");
}
});
}
// Define the main function to run the entire process
const runEmbeddings = async (userData) => {
try {
let vectorStore;
// Check if an existing vector store is available
console.log("Checking for existing vector store...");
if (fs.existsSync(VECTOR_STORE_PATH)) {
// Load the existing vector store
console.log("Loading existing vector store...");
vectorStore = await HNSWLib.load(
VECTOR_STORE_PATH,
new OpenAIEmbeddings()
);
console.log("Vector store loaded.");
} else {
// Create a new vector store if one does not exist
console.log("Creating new vector store...");
const textSplitter = new RecursiveCharacterTextSplitter({
chunkSize: 1500,
});
const normalizedDocs = normalizeDocuments(docs);
const splitDocs = await textSplitter.createDocuments(normalizedDocs);
// Generate the vector store from the documents
vectorStore = await HNSWLib.fromDocuments(
splitDocs,
new OpenAIEmbeddings()
);
// Save the vector store to the specified path
await vectorStore.save(VECTOR_STORE_PATH);
console.log("Vector store created.");
}
const retriever = vectorStore.asRetriever({
k: 6,
searchType: "similarity",
});
const template = `
You are a career counseling assistant named Nexa, specializing in personalized advice for students in Pakistan. Your goal is to help users make informed decisions about their future career paths based on their age, gender, educational background, interests, goals, strengths, weaknesses, and financial situation. Use the information provided by the user and the context to categorize them into one of the target audiences and provide tailored advice.
Response Format:
- Start your response with Dear, considering your current situation, I suggest you these [field name], [field name], [field name] career paths. You have the option to do [degree/program name] in these fields from [Uni Name/Institute Name] or [Uni Name/Institute Name].
Use the following context to answer the question and provide helpful advice to the user.
{context}
Question: {question}
Helpful Answer:`;
const customRagPrompt = PromptTemplate.fromTemplate(template);
const llm = new ChatOpenAI({
model: "gpt-3.5-turbo-16k",
temperature: 0,
});
const ragChain = await createStuffDocumentsChain({
llm,
prompt: customRagPrompt,
outputParser: new StringOutputParser(),
});
const context = await retriever.getRelevantDocuments(userData);
const res = await ragChain.invoke({
question: userData,
context,
});
return res;
} catch (error) {
console.error(error);
}
};
// const userData =
// "Suppose I have 85% in my matriculation in science subjects, which colleges I can opt in Karachi?";
//runEmbeddings(userData);
export default runEmbeddings;