Project: Full-Stack RAG App
In this hands-on project, you'll build a complete RAG (Retrieval-Augmented Generation) application that can answer questions about your own documents. We'll combine everything you've learned: document loading, text splitting, embeddings, vector storage, chains, and streaming.
ποΈ What We're Building
- π₯ Document Ingestion: Upload PDFs and web pages into a vector database
- π Semantic Search: Find relevant chunks based on user questions
- π¬ Conversational AI: Chat with your documents with memory
- π‘ Streaming: Real-time token streaming for great UX
- π Source Citations: Show which documents the answer came from
Project Structure
rag-app/
βββ src/
β βββ lib/
β β βββ embeddings.ts # Embedding configuration
β β βββ vectorStore.ts # Vector store setup
β β βββ ingest.ts # Document ingestion pipeline
β β βββ ragChain.ts # RAG chain with memory
β βββ app/
β β βββ api/
β β β βββ ingest/route.ts # Upload endpoint
β β β βββ chat/route.ts # Chat endpoint
β β βββ page.tsx # Chat UI
βββ .env.local
βββ package.json
Step 1: Setup & Configuration
# Install dependencies
npm install langchain @langchain/openai @langchain/community
npm install @langchain/core pdf-parse cheerio
// src/lib/embeddings.ts
import { OpenAIEmbeddings } from "@langchain/openai";
export const embeddings = new OpenAIEmbeddings({
modelName: "text-embedding-3-small",
// Dimensions: 1536 (default) β good balance of quality and speed
});
Step 2: Vector Store Setup
// src/lib/vectorStore.ts
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { embeddings } from "./embeddings";
import { Document } from "@langchain/core/documents";
// In-memory store (replace with Pinecone/Supabase for production)
let vectorStore: MemoryVectorStore | null = null;
export async function getVectorStore() {
if (!vectorStore) {
vectorStore = new MemoryVectorStore(embeddings);
}
return vectorStore;
}
export async function addDocuments(docs: Document[]) {
const store = await getVectorStore();
await store.addDocuments(docs);
console.log(`Added ${docs.length} documents to vector store`);
}
export async function searchDocuments(query: string, k = 4) {
const store = await getVectorStore();
return store.similaritySearchWithScore(query, k);
}
Step 3: Document Ingestion Pipeline
// src/lib/ingest.ts
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { addDocuments } from "./vectorStore";
const splitter = new RecursiveCharacterTextSplitter({
chunkSize: 1000,
chunkOverlap: 200,
});
// Ingest a PDF file
export async function ingestPDF(filePath: string) {
console.log(`Ingesting PDF: ${filePath}`);
const loader = new PDFLoader(filePath, { splitPages: true });
const rawDocs = await loader.load();
const splitDocs = await splitter.splitDocuments(rawDocs);
// Add source metadata
const docsWithMeta = splitDocs.map((doc) => ({
...doc,
metadata: {
...doc.metadata,
source: filePath,
type: "pdf",
ingestedAt: new Date().toISOString(),
},
}));
await addDocuments(docsWithMeta);
return { chunks: docsWithMeta.length, pages: rawDocs.length };
}
// Ingest a web page
export async function ingestURL(url: string) {
console.log(`Ingesting URL: ${url}`);
const loader = new CheerioWebBaseLoader(url);
const rawDocs = await loader.load();
const splitDocs = await splitter.splitDocuments(rawDocs);
const docsWithMeta = splitDocs.map((doc) => ({
...doc,
metadata: {
...doc.metadata,
source: url,
type: "web",
ingestedAt: new Date().toISOString(),
},
}));
await addDocuments(docsWithMeta);
return { chunks: docsWithMeta.length };
}
Step 4: RAG Chain with Conversation Memory
// src/lib/ragChain.ts
import { ChatOpenAI } from "@langchain/openai";
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
import { createRetrievalChain } from "langchain/chains/retrieval";
import { createHistoryAwareRetriever } from "langchain/chains/history_aware_retriever";
import { getVectorStore } from "./vectorStore";
import { BaseMessage } from "@langchain/core/messages";
const llm = new ChatOpenAI({
modelName: "gpt-4",
streaming: true,
temperature: 0.3,
});
export async function createRAGChain() {
const vectorStore = await getVectorStore();
const retriever = vectorStore.asRetriever({ k: 4 });
// Contextualize question based on chat history
const contextualizePrompt = ChatPromptTemplate.fromMessages([
["system", `Given the chat history and latest question, reformulate
the question to be standalone (understandable without chat history).
Do NOT answer the question, just reformulate it if needed.`],
new MessagesPlaceholder("chat_history"),
["human", "{input}"],
]);
const historyAwareRetriever = await createHistoryAwareRetriever({
llm,
retriever,
rephrasePrompt: contextualizePrompt,
});
// Answer question using retrieved context
const answerPrompt = ChatPromptTemplate.fromMessages([
["system", `You are a helpful assistant that answers questions based on
the provided context. If the context doesn't contain relevant information,
say so honestly. Always cite which source document your answer comes from.
Context: {context}`],
new MessagesPlaceholder("chat_history"),
["human", "{input}"],
]);
const documentChain = await createStuffDocumentsChain({
llm,
prompt: answerPrompt,
});
return await createRetrievalChain({
retriever: historyAwareRetriever,
combineDocsChain: documentChain,
});
}
Step 5: Streaming API Route
// app/api/chat/route.ts
import { createRAGChain } from "@/lib/ragChain";
import { HumanMessage, AIMessage } from "@langchain/core/messages";
export async function POST(req: Request) {
const { message, chatHistory = [] } = await req.json();
const chain = await createRAGChain();
// Convert chat history to LangChain messages
const history = chatHistory.map((msg: any) =>
msg.role === "user"
? new HumanMessage(msg.content)
: new AIMessage(msg.content)
);
// Stream the response
const stream = await chain.stream({
input: message,
chat_history: history,
});
const encoder = new TextEncoder();
let sources: any[] = [];
const readable = new ReadableStream({
async start(controller) {
for await (const chunk of stream) {
if (chunk.answer) {
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({
type: "token",
content: chunk.answer
})}\n\n`)
);
}
if (chunk.context) {
sources = chunk.context.map((doc: any) => ({
content: doc.pageContent.slice(0, 200),
source: doc.metadata.source,
type: doc.metadata.type,
}));
}
}
// Send sources at the end
controller.enqueue(
encoder.encode(`data: ${JSON.stringify({
type: "sources",
sources
})}\n\n`)
);
controller.enqueue(encoder.encode("data: [DONE]\n\n"));
controller.close();
},
});
return new Response(readable, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache",
},
});
}
Step 6: Chat UI Component
"use client";
import { useState, useRef } from "react";
interface Message {
role: "user" | "assistant";
content: string;
sources?: { content: string; source: string }[];
}
export default function RAGChat() {
const [messages, setMessages] = useState<Message[]>([]);
const [input, setInput] = useState("");
const [isLoading, setIsLoading] = useState(false);
async function handleSubmit(e: React.FormEvent) {
e.preventDefault();
if (!input.trim()) return;
const userMessage = input;
setInput("");
setMessages((prev) => [...prev, { role: "user", content: userMessage }]);
setIsLoading(true);
// Add empty assistant message for streaming
setMessages((prev) => [...prev, { role: "assistant", content: "" }]);
const response = await fetch("/api/chat", {
method: "POST",
body: JSON.stringify({
message: userMessage,
chatHistory: messages,
}),
});
const reader = response.body!.getReader();
const decoder = new TextDecoder();
while (true) {
const { done, value } = await reader.read();
if (done) break;
const lines = decoder.decode(value).split("\n");
for (const line of lines) {
if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
const data = JSON.parse(line.slice(6));
if (data.type === "token") {
setMessages((prev) => {
const updated = [...prev];
const last = updated[updated.length - 1];
last.content += data.content;
return updated;
});
}
if (data.type === "sources") {
setMessages((prev) => {
const updated = [...prev];
updated[updated.length - 1].sources = data.sources;
return updated;
});
}
}
}
setIsLoading(false);
}
return (
<div className="max-w-2xl mx-auto p-4">
<div className="space-y-4 mb-4">
{messages.map((msg, i) => (
<div key={i} className={msg.role === "user" ? "text-right" : ""}>
<div className={
msg.role === "user"
? "bg-blue-500 text-white rounded-lg p-3 inline-block"
: "bg-gray-100 rounded-lg p-3"
}>
{msg.content}{isLoading && i === messages.length - 1 && "β"}
</div>
{msg.sources && (
<div className="mt-2 text-sm text-gray-500">
Sources: {msg.sources.map((s) => s.source).join(", ")}
</div>
)}
</div>
))}
</div>
<form onSubmit={handleSubmit} className="flex gap-2">
<input
value={input}
onChange={(e) => setInput(e.target.value)}
placeholder="Ask about your documents..."
className="flex-1 border rounded-lg px-4 py-2"
/>
<button type="submit" disabled={isLoading}
className="bg-blue-500 text-white px-6 py-2 rounded-lg">
Send
</button>
</form>
</div>
);
}
π Extending This Project
- β’ Switch to Pinecone or Supabase pgvector for persistent storage
- β’ Add file upload UI with drag-and-drop for PDFs
- β’ Implement authentication so each user has their own documents
- β’ Add metadata filtering (search only certain document types)
- β’ Deploy to Vercel with Edge Functions for low latency
π‘ Key Takeaways
- β’ A full RAG app combines: Document Loading β Splitting β Embedding β Vector Storage β Retrieval β Generation
- β’ History-aware retrievers make conversations natural by contextualizing follow-up questions
- β’ Streaming with SSE provides real-time token delivery for the best UX
- β’ Always provide source citations so users can verify the AI's answers
- β’ Start with MemoryVectorStore, then upgrade to a persistent vector DB for production