Skip to content

Commit 843380a

Browse files
committed
LLM initialized , and packages configured
1 parent b589596 commit 843380a

File tree

9 files changed

+322
-0
lines changed

9 files changed

+322
-0
lines changed

package.json

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,14 @@
1717
"src/**/*.{js,jsx,ts,tsx}": "npm run lint-prettier"
1818
},
1919
"dependencies": {
20+
"@langchain/pinecone": "^0.0.7",
21+
"@pinecone-database/pinecone": "2.2.2",
22+
"@langchain/community": "^0.2.15",
2023
"@radix-ui/react-slot": "^1.1.0",
24+
"ai": "^2.1.34",
25+
"ai-stream-experimental": "^2.2.2",
26+
"langchain": "^0.2.8",
27+
"git-repo-parser": "^2.0.6",
2128
"class-variance-authority": "^0.7.0",
2229
"clsx": "^2.1.1",
2330
"framer-motion": "^11.2.12",

src/lib/config.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import z from 'zod';
2+
3+
const envSchema = z.object({
4+
OPENAI_API_KEY: z.string().trim().min(1),
5+
PINECONE_API_KEY: z.string().trim().min(1),
6+
PINECONE_ENVIRONMENT: z.string().trim().min(1),
7+
PINECONE_INDEX_NAME: z.string().trim().min(1),
8+
PDF_PATH: z.string().trim().min(1),
9+
INDEX_INIT_TIMEOUT: z.coerce.number().min(1),
10+
});
11+
12+
export const env = envSchema.parse(process.env);

src/lib/langchain.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import { ConversationalRetrievalQAChain } from "langchain/chains";
2+
import { getVectorStore } from "./vector-store";
3+
import { getPineconeClient } from "./pinecone-client";
4+
import {
5+
StreamingTextResponse,
6+
experimental_StreamData,
7+
LangChainStream,
8+
} from "ai-stream-experimental";
9+
import { streamingModel, nonStreamingModel } from "./llm";
10+
import { STANDALONE_QUESTION_TEMPLATE, QA_TEMPLATE } from "./prompt-templates";
11+
12+
type callChainArgs = {
13+
question: string;
14+
chatHistory: string;
15+
};
16+
17+
export async function callChain({ question, chatHistory }: callChainArgs) {
18+
try {
19+
// Open AI recommendation
20+
const sanitizedQuestion = question.trim().replaceAll("\n", " ");
21+
const pineconeClient = await getPineconeClient();
22+
const vectorStore = await getVectorStore(pineconeClient);
23+
const retriever = vectorStore.asRetriever({
24+
searchKwargs: { k: 5 }, // Increase from default 4 to 5
25+
searchType: "mmr", // Use Maximum Marginal Relevance for diverse results
26+
filter: { type: "code" } // Add a filter if you've categorized your embeddings
27+
});
28+
const { stream, handlers } = LangChainStream({
29+
experimental_streamData: true,
30+
});
31+
const data = new experimental_StreamData();
32+
33+
const chain = ConversationalRetrievalQAChain.fromLLM(
34+
streamingModel,
35+
vectorStore.asRetriever(),
36+
{
37+
qaTemplate: QA_TEMPLATE,
38+
questionGeneratorTemplate: STANDALONE_QUESTION_TEMPLATE,
39+
returnSourceDocuments: true, //default 4
40+
questionGeneratorChainOptions: {
41+
llm: nonStreamingModel,
42+
},
43+
}
44+
);
45+
46+
// Question using chat-history
47+
// Reference https://js.langchain.com/docs/modules/chains/popular/chat_vector_db#externally-managed-memory
48+
chain
49+
.call(
50+
{
51+
question: sanitizedQuestion,
52+
chat_history: chatHistory,
53+
},
54+
[handlers]
55+
)
56+
.then(async (res) => {
57+
const sourceDocuments = res?.sourceDocuments;
58+
const firstTwoDocuments = sourceDocuments.slice(0, 2);
59+
const pageContents = firstTwoDocuments.map(
60+
({ pageContent }: { pageContent: string }) => pageContent
61+
);
62+
console.log("already appended ", data);
63+
data.append({
64+
sources: pageContents,
65+
});
66+
data.close();
67+
});
68+
69+
// Return the readable stream
70+
return new StreamingTextResponse(stream, {}, data);
71+
} catch (e) {
72+
console.error(e);
73+
throw new Error("Call chain method failed to execute successfully!!");
74+
}
75+
}

src/lib/llm.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import { ChatOpenAI } from "@langchain/openai";
2+
3+
export const streamingModel = new ChatOpenAI({
4+
modelName: "gpt-3.5-turbo",
5+
streaming: true,
6+
verbose: true,
7+
temperature: 0,
8+
});
9+
10+
export const nonStreamingModel = new ChatOpenAI({
11+
modelName: "gpt-3.5-turbo",
12+
verbose: true,
13+
temperature: 0,
14+
});
15+
16+
17+
18+
// const model = new ChatGoogleGenerativeAI({
19+
// model: "gemini-pro",
20+
// maxOutputTokens: 2048,
21+
// apiKey,
22+
// });
23+
// export const streamingModel = new ChatGoogleGenerativeAI({
24+
// model: "gemini-pro", // Replace with your desired Gemini model name
25+
// streaming: true,
26+
// verbose: true,
27+
// temperature: 0.5,
28+
// apiKey,
29+
// });
30+
31+
// export const nonStreamingModel = new ChatGoogleGenerativeAI({
32+
// model: "gemini-pro", // Replace with your desired Gemini model name
33+
// verbose: true,
34+
// temperature: 0.5,
35+
// apiKey,
36+
// });

src/lib/pdf-loader.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
2+
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
3+
import { env } from "./config";
4+
5+
export async function getChunkedDocsFromPDF() {
6+
try {
7+
const loader = new PDFLoader(env.PDF_PATH);
8+
const docs = await loader.load();
9+
10+
// From the docs https://www.pinecone.io/learn/chunking-strategies/
11+
const textSplitter = new RecursiveCharacterTextSplitter({
12+
chunkSize: 1000,
13+
chunkOverlap: 200,
14+
});
15+
16+
const chunkedDocs = await textSplitter.splitDocuments(docs);
17+
18+
return chunkedDocs;
19+
} catch (e) {
20+
console.error(e);
21+
throw new Error("PDF docs chunking failed !");
22+
}
23+
}

src/lib/pinecone-client.ts

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { Pinecone } from '@pinecone-database/pinecone';
2+
import { env } from "./config";
3+
4+
let pineconeClientInstance: Pinecone | null = null;
5+
6+
// Initialize Pinecone client and connect to the index.
7+
async function initPineconeClient() {
8+
try {
9+
const pineconeClient = new Pinecone({
10+
apiKey: env.PINECONE_API_KEY,
11+
});
12+
13+
return pineconeClient;
14+
} catch (error) {
15+
console.error("Error initializing Pinecone client:", error);
16+
throw new Error("Failed to initialize Pinecone Client");
17+
}
18+
}
19+
20+
export async function getPineconeClient() {
21+
if (!pineconeClientInstance) {
22+
pineconeClientInstance = await initPineconeClient();
23+
}
24+
25+
return pineconeClientInstance;
26+
}
27+
28+
// Example usage to ensure the client is working

src/lib/prompt-templates.ts

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Creates a standalone question from the chat-history and the current question
2+
export const STANDALONE_QUESTION_TEMPLATE = `Given the following conversation history and a follow-up question, rephrase the follow-up question to be a standalone question that can be understood without the conversation context.
3+
4+
Chat History:
5+
{chat_history}
6+
Follow-Up Input: {question}
7+
Standalone Question:`;
8+
9+
// Actual question you ask the chat and send the response to client
10+
export const QA_TEMPLATE = `You are an AI assistant specializing in software development and project management. You have been provided with the codebase of a project. Use the following context to answer the question at the end.
11+
12+
If the question is about code:
13+
1. Provide a brief explanation of the relevant code snippet.
14+
2. Suggest improvements or best practices if applicable.
15+
3. If asked about a specific function or feature, explain its purpose and how it fits into the larger project.
16+
17+
If the question is about project management:
18+
1. Offer insights based on the code structure and organization.
19+
2. Suggest potential improvements in project architecture or workflow.
20+
21+
If you don't know the answer, just say you don't know. DO NOT try to make up an answer.
22+
Always base your answers on the provided context, but you can briefly supplement with your general knowledge if relevant.
23+
24+
{context}
25+
26+
Question: {question}
27+
Helpful answer in markdown:`;

src/lib/utils.ts

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,67 @@
11
import { clsx, type ClassValue } from 'clsx';
22
import { twMerge } from 'tailwind-merge';
3+
import { Message } from "ai";
4+
35

46
export function cn(...inputs: ClassValue[]) {
57
return twMerge(clsx(inputs));
68
}
9+
10+
export function delay(ms: number) {
11+
return new Promise((resolve) => setTimeout(resolve, ms));
12+
}
13+
14+
export function scrollToBottom(containerRef: React.RefObject<HTMLElement>) {
15+
if (containerRef.current) {
16+
const lastMessage = containerRef.current.lastElementChild;
17+
if (lastMessage) {
18+
const scrollOptions: ScrollIntoViewOptions = {
19+
behavior: "smooth",
20+
block: "end",
21+
};
22+
lastMessage.scrollIntoView(scrollOptions);
23+
}
24+
}
25+
}
26+
27+
// Reference:
28+
// github.com/hwchase17/langchainjs/blob/357d6fccfc78f1332b54d2302d92e12f0861c12c/examples/src/guides/expression_language/cookbook_conversational_retrieval.ts#L61
29+
export const formatChatHistory = (chatHistory: [string, string][]) => {
30+
const formattedDialogueTurns = chatHistory.map(
31+
(dialogueTurn) => `Human: ${dialogueTurn[0]}\nAssistant: ${dialogueTurn[1]}`
32+
);
33+
34+
return formattedDialogueTurns.join("\n");
35+
};
36+
37+
export function formattedText(inputText: string) {
38+
return inputText
39+
.replace(/\n+/g, " ") // Replace multiple consecutive new lines with a single space
40+
.replace(/(\w) - (\w)/g, "$1$2") // Join hyphenated words together
41+
.replace(/\s+/g, " "); // Replace multiple consecutive spaces with a single space
42+
}
43+
44+
// Default UI Message
45+
export const initialMessages: Message[] = [
46+
{
47+
role: "assistant",
48+
id: "0",
49+
content:
50+
"Hi! I am your Project Assistant. I am happy to help with your questions about your project's Codebase.",
51+
},
52+
];
53+
54+
interface Data {
55+
sources: string[];
56+
}
57+
58+
// Maps the sources with the right ai-message
59+
export const getSources = (data: Data[], role: string, index: number) => {
60+
if (role === "assistant" && index >= 2 && (index - 2) % 2 === 0) {
61+
const sourcesIndex = (index - 2) / 2;
62+
if (data[sourcesIndex] && data[sourcesIndex].sources) {
63+
return data[sourcesIndex].sources;
64+
}
65+
}
66+
return [];
67+
};

src/lib/vector-store.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import { env } from './config';
2+
import { OpenAIEmbeddings } from '@langchain/openai';
3+
import { PineconeStore } from "@langchain/pinecone";
4+
import { Pinecone } from '@pinecone-database/pinecone';
5+
6+
export async function embedAndStoreDocs(
7+
client: Pinecone,
8+
docs: { id: string, text: string }[] // Adjusted to match the document structure
9+
) {
10+
try {
11+
console.log("Initializing embeddings...");
12+
const embeddings = new OpenAIEmbeddings();
13+
console.log("Embeddings initialized.");
14+
15+
console.log("Connecting to Pinecone index...");
16+
const pineconeIndex = client.Index(env.PINECONE_INDEX_NAME);
17+
console.log("Connected to Pinecone index.");
18+
19+
console.log("Embedding and storing documents...");
20+
const documents = docs.map(doc => ({
21+
id: doc.id,
22+
text: doc.text,
23+
pageContent: doc.text, // Adding pageContent as text
24+
metadata: {} // Adding empty metadata
25+
}));
26+
await PineconeStore.fromDocuments(documents, embeddings, {
27+
pineconeIndex,
28+
textKey: 'text',
29+
});
30+
console.log("Documents embedded and stored.");
31+
} catch (error) {
32+
console.error("Error while embedding and storing documents: ", error);
33+
throw new Error('Failed to load your docs!');
34+
}
35+
}
36+
37+
// Returns vector-store handle to be used as retrievers on langchains
38+
export async function getVectorStore(client: Pinecone) {
39+
try {
40+
const embeddings = new OpenAIEmbeddings();
41+
const pineconeIndex = client.Index(env.PINECONE_INDEX_NAME);
42+
43+
const vectorStore = await PineconeStore.fromExistingIndex(embeddings, {
44+
pineconeIndex,
45+
textKey: 'text',
46+
});
47+
48+
return vectorStore;
49+
} catch (error) {
50+
console.log('error ', error);
51+
throw new Error('Something went wrong while getting vector store!');
52+
}
53+
}

0 commit comments

Comments
 (0)