TechLead
Lesson 18 of 18
7 min read
LangChain

Building a Full RAG Application

Build a complete Retrieval-Augmented Generation application from scratch with document ingestion, vector search, and conversational AI

Project: Full-Stack RAG App

In this hands-on project, you'll build a complete RAG (Retrieval-Augmented Generation) application that can answer questions about your own documents. We'll combine everything you've learned: document loading, text splitting, embeddings, vector storage, chains, and streaming.

πŸ—οΈ What We're Building

  • πŸ“₯ Document Ingestion: Upload PDFs and web pages into a vector database
  • πŸ” Semantic Search: Find relevant chunks based on user questions
  • πŸ’¬ Conversational AI: Chat with your documents with memory
  • πŸ“‘ Streaming: Real-time token streaming for great UX
  • πŸ“š Source Citations: Show which documents the answer came from

Project Structure

rag-app/
β”œβ”€β”€ src/
β”‚   β”œβ”€β”€ lib/
β”‚   β”‚   β”œβ”€β”€ embeddings.ts      # Embedding configuration
β”‚   β”‚   β”œβ”€β”€ vectorStore.ts     # Vector store setup
β”‚   β”‚   β”œβ”€β”€ ingest.ts          # Document ingestion pipeline
β”‚   β”‚   └── ragChain.ts        # RAG chain with memory
β”‚   β”œβ”€β”€ app/
β”‚   β”‚   β”œβ”€β”€ api/
β”‚   β”‚   β”‚   β”œβ”€β”€ ingest/route.ts   # Upload endpoint
β”‚   β”‚   β”‚   └── chat/route.ts     # Chat endpoint
β”‚   β”‚   └── page.tsx              # Chat UI
β”œβ”€β”€ .env.local
└── package.json

Step 1: Setup & Configuration

# Install dependencies
npm install langchain @langchain/openai @langchain/community
npm install @langchain/core pdf-parse cheerio
// src/lib/embeddings.ts
import { OpenAIEmbeddings } from "@langchain/openai";

export const embeddings = new OpenAIEmbeddings({
  modelName: "text-embedding-3-small",
  // Dimensions: 1536 (default) β€” good balance of quality and speed
});

Step 2: Vector Store Setup

// src/lib/vectorStore.ts
import { MemoryVectorStore } from "langchain/vectorstores/memory";
import { embeddings } from "./embeddings";
import { Document } from "@langchain/core/documents";

// In-memory store (replace with Pinecone/Supabase for production)
let vectorStore: MemoryVectorStore | null = null;

export async function getVectorStore() {
  if (!vectorStore) {
    vectorStore = new MemoryVectorStore(embeddings);
  }
  return vectorStore;
}

export async function addDocuments(docs: Document[]) {
  const store = await getVectorStore();
  await store.addDocuments(docs);
  console.log(`Added ${docs.length} documents to vector store`);
}

export async function searchDocuments(query: string, k = 4) {
  const store = await getVectorStore();
  return store.similaritySearchWithScore(query, k);
}

Step 3: Document Ingestion Pipeline

// src/lib/ingest.ts
import { PDFLoader } from "@langchain/community/document_loaders/fs/pdf";
import { CheerioWebBaseLoader } from "@langchain/community/document_loaders/web/cheerio";
import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
import { addDocuments } from "./vectorStore";

const splitter = new RecursiveCharacterTextSplitter({
  chunkSize: 1000,
  chunkOverlap: 200,
});

// Ingest a PDF file
export async function ingestPDF(filePath: string) {
  console.log(`Ingesting PDF: ${filePath}`);
  
  const loader = new PDFLoader(filePath, { splitPages: true });
  const rawDocs = await loader.load();
  
  const splitDocs = await splitter.splitDocuments(rawDocs);
  
  // Add source metadata
  const docsWithMeta = splitDocs.map((doc) => ({
    ...doc,
    metadata: {
      ...doc.metadata,
      source: filePath,
      type: "pdf",
      ingestedAt: new Date().toISOString(),
    },
  }));
  
  await addDocuments(docsWithMeta);
  return { chunks: docsWithMeta.length, pages: rawDocs.length };
}

// Ingest a web page
export async function ingestURL(url: string) {
  console.log(`Ingesting URL: ${url}`);
  
  const loader = new CheerioWebBaseLoader(url);
  const rawDocs = await loader.load();
  
  const splitDocs = await splitter.splitDocuments(rawDocs);
  
  const docsWithMeta = splitDocs.map((doc) => ({
    ...doc,
    metadata: {
      ...doc.metadata,
      source: url,
      type: "web",
      ingestedAt: new Date().toISOString(),
    },
  }));
  
  await addDocuments(docsWithMeta);
  return { chunks: docsWithMeta.length };
}

Step 4: RAG Chain with Conversation Memory

// src/lib/ragChain.ts
import { ChatOpenAI } from "@langchain/openai";
import { ChatPromptTemplate, MessagesPlaceholder } from "@langchain/core/prompts";
import { createStuffDocumentsChain } from "langchain/chains/combine_documents";
import { createRetrievalChain } from "langchain/chains/retrieval";
import { createHistoryAwareRetriever } from "langchain/chains/history_aware_retriever";
import { getVectorStore } from "./vectorStore";
import { BaseMessage } from "@langchain/core/messages";

const llm = new ChatOpenAI({
  modelName: "gpt-4",
  streaming: true,
  temperature: 0.3,
});

export async function createRAGChain() {
  const vectorStore = await getVectorStore();
  const retriever = vectorStore.asRetriever({ k: 4 });

  // Contextualize question based on chat history
  const contextualizePrompt = ChatPromptTemplate.fromMessages([
    ["system", `Given the chat history and latest question, reformulate 
    the question to be standalone (understandable without chat history). 
    Do NOT answer the question, just reformulate it if needed.`],
    new MessagesPlaceholder("chat_history"),
    ["human", "{input}"],
  ]);

  const historyAwareRetriever = await createHistoryAwareRetriever({
    llm,
    retriever,
    rephrasePrompt: contextualizePrompt,
  });

  // Answer question using retrieved context
  const answerPrompt = ChatPromptTemplate.fromMessages([
    ["system", `You are a helpful assistant that answers questions based on 
    the provided context. If the context doesn't contain relevant information, 
    say so honestly. Always cite which source document your answer comes from.
    
    Context: {context}`],
    new MessagesPlaceholder("chat_history"),
    ["human", "{input}"],
  ]);

  const documentChain = await createStuffDocumentsChain({
    llm,
    prompt: answerPrompt,
  });

  return await createRetrievalChain({
    retriever: historyAwareRetriever,
    combineDocsChain: documentChain,
  });
}

Step 5: Streaming API Route

// app/api/chat/route.ts
import { createRAGChain } from "@/lib/ragChain";
import { HumanMessage, AIMessage } from "@langchain/core/messages";

export async function POST(req: Request) {
  const { message, chatHistory = [] } = await req.json();

  const chain = await createRAGChain();

  // Convert chat history to LangChain messages
  const history = chatHistory.map((msg: any) =>
    msg.role === "user"
      ? new HumanMessage(msg.content)
      : new AIMessage(msg.content)
  );

  // Stream the response
  const stream = await chain.stream({
    input: message,
    chat_history: history,
  });

  const encoder = new TextEncoder();
  let sources: any[] = [];

  const readable = new ReadableStream({
    async start(controller) {
      for await (const chunk of stream) {
        if (chunk.answer) {
          controller.enqueue(
            encoder.encode(`data: ${JSON.stringify({ 
              type: "token", 
              content: chunk.answer 
            })}\n\n`)
          );
        }
        if (chunk.context) {
          sources = chunk.context.map((doc: any) => ({
            content: doc.pageContent.slice(0, 200),
            source: doc.metadata.source,
            type: doc.metadata.type,
          }));
        }
      }
      // Send sources at the end
      controller.enqueue(
        encoder.encode(`data: ${JSON.stringify({ 
          type: "sources", 
          sources 
        })}\n\n`)
      );
      controller.enqueue(encoder.encode("data: [DONE]\n\n"));
      controller.close();
    },
  });

  return new Response(readable, {
    headers: {
      "Content-Type": "text/event-stream",
      "Cache-Control": "no-cache",
    },
  });
}

Step 6: Chat UI Component

"use client";
import { useState, useRef } from "react";

interface Message {
  role: "user" | "assistant";
  content: string;
  sources?: { content: string; source: string }[];
}

export default function RAGChat() {
  const [messages, setMessages] = useState<Message[]>([]);
  const [input, setInput] = useState("");
  const [isLoading, setIsLoading] = useState(false);

  async function handleSubmit(e: React.FormEvent) {
    e.preventDefault();
    if (!input.trim()) return;

    const userMessage = input;
    setInput("");
    setMessages((prev) => [...prev, { role: "user", content: userMessage }]);
    setIsLoading(true);

    // Add empty assistant message for streaming
    setMessages((prev) => [...prev, { role: "assistant", content: "" }]);

    const response = await fetch("/api/chat", {
      method: "POST",
      body: JSON.stringify({
        message: userMessage,
        chatHistory: messages,
      }),
    });

    const reader = response.body!.getReader();
    const decoder = new TextDecoder();

    while (true) {
      const { done, value } = await reader.read();
      if (done) break;

      const lines = decoder.decode(value).split("\n");
      for (const line of lines) {
        if (!line.startsWith("data: ") || line === "data: [DONE]") continue;
        
        const data = JSON.parse(line.slice(6));
        
        if (data.type === "token") {
          setMessages((prev) => {
            const updated = [...prev];
            const last = updated[updated.length - 1];
            last.content += data.content;
            return updated;
          });
        }
        
        if (data.type === "sources") {
          setMessages((prev) => {
            const updated = [...prev];
            updated[updated.length - 1].sources = data.sources;
            return updated;
          });
        }
      }
    }

    setIsLoading(false);
  }

  return (
    <div className="max-w-2xl mx-auto p-4">
      <div className="space-y-4 mb-4">
        {messages.map((msg, i) => (
          <div key={i} className={msg.role === "user" ? "text-right" : ""}>
            <div className={
              msg.role === "user" 
                ? "bg-blue-500 text-white rounded-lg p-3 inline-block" 
                : "bg-gray-100 rounded-lg p-3"
            }>
              {msg.content}{isLoading && i === messages.length - 1 && "β–Š"}
            </div>
            {msg.sources && (
              <div className="mt-2 text-sm text-gray-500">
                Sources: {msg.sources.map((s) => s.source).join(", ")}
              </div>
            )}
          </div>
        ))}
      </div>
      <form onSubmit={handleSubmit} className="flex gap-2">
        <input
          value={input}
          onChange={(e) => setInput(e.target.value)}
          placeholder="Ask about your documents..."
          className="flex-1 border rounded-lg px-4 py-2"
        />
        <button type="submit" disabled={isLoading} 
          className="bg-blue-500 text-white px-6 py-2 rounded-lg">
          Send
        </button>
      </form>
    </div>
  );
}

πŸš€ Extending This Project

  • β€’ Switch to Pinecone or Supabase pgvector for persistent storage
  • β€’ Add file upload UI with drag-and-drop for PDFs
  • β€’ Implement authentication so each user has their own documents
  • β€’ Add metadata filtering (search only certain document types)
  • β€’ Deploy to Vercel with Edge Functions for low latency

πŸ’‘ Key Takeaways

  • β€’ A full RAG app combines: Document Loading β†’ Splitting β†’ Embedding β†’ Vector Storage β†’ Retrieval β†’ Generation
  • β€’ History-aware retrievers make conversations natural by contextualizing follow-up questions
  • β€’ Streaming with SSE provides real-time token delivery for the best UX
  • β€’ Always provide source citations so users can verify the AI's answers
  • β€’ Start with MemoryVectorStore, then upgrade to a persistent vector DB for production

Continue Learning