LangChain 실전 가이드 | LLM 애플리케이션 개발 프레임워크
이 글의 핵심
LangChain으로 LLM 애플리케이션을 만드는 완벽 가이드. Chains, Agents, Memory, RAG 구현, 벡터 스토어, 프롬프트 템플릿까지 실전 예제로 완벽 이해.
들어가며
LangChain은 LLM 기반 애플리케이션을 쉽게 만들 수 있는 프레임워크입니다. 복잡한 워크플로우, 메모리 관리, 외부 도구 연동을 간단하게 구현할 수 있습니다.
실무 경험: VOD 콘텐츠 자동 태깅 시스템을 LangChain으로 구축하면서, 수천 개의 영상 메타데이터를 자동으로 분류하고 검색 정확도를 40% 향상시킨 경험을 공유합니다.
이 글에서 다룰 내용:
- LangChain 기본 개념과 설치
- Chains: 여러 단계를 연결
- Agents: 자율적으로 도구 사용
- Memory: 대화 이력 관리
- RAG: 문서 검색 기반 답변
- Vector Stores: 임베딩과 유사도 검색
- 실전 예제
목차
- LangChain 시작하기
- Chains: 작업 연결
- Prompt Templates
- Memory: 대화 이력
- Agents: 자율 도구 사용
- RAG 구현
- Vector Stores
- 실전 예제
1. LangChain 시작하기
설치
# Python
pip install langchain langchain-openai langchain-community
# 추가 패키지
pip install chromadb # 벡터 스토어
pip install tiktoken # 토큰 계산
pip install faiss-cpu # 벡터 검색
# Node.js
npm install langchain @langchain/openai
기본 설정
import os
from langchain_openai import ChatOpenAI
os.environ["OPENAI_API_KEY"] = "sk-..."
# LLM 초기화
llm = ChatOpenAI(
model="gpt-4o-mini",
temperature=0.7,
max_tokens=1000
)
# 간단한 호출
response = llm.invoke("What is LangChain?")
print(response.content)
2. Chains: 작업 연결
기본 Chain
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.schema.output_parser import StrOutputParser
# 1. 프롬프트 템플릿
prompt = ChatPromptTemplate.from_template(
"Translate the following to {language}: {text}"
)
# 2. LLM
llm = ChatOpenAI(model="gpt-4o-mini")
# 3. 출력 파서
output_parser = StrOutputParser()
# 4. Chain 구성 (LCEL - LangChain Expression Language)
chain = prompt | llm | output_parser
# 5. 실행
result = chain.invoke({
"language": "Korean",
"text": "Hello, how are you?"
})
print(result) # "안녕하세요, 어떻게 지내세요?"
Sequential Chain
from langchain.chains import LLMChain, SequentialChain
from langchain.prompts import PromptTemplate
# Chain 1: 주제 생성
topic_prompt = PromptTemplate(
input_variables=["keyword"],
template="Generate a blog post topic about {keyword}"
)
topic_chain = LLMChain(llm=llm, prompt=topic_prompt, output_key="topic")
# Chain 2: 개요 작성
outline_prompt = PromptTemplate(
input_variables=["topic"],
template="Create an outline for: {topic}"
)
outline_chain = LLMChain(llm=llm, prompt=outline_prompt, output_key="outline")
# Chain 3: 본문 작성
content_prompt = PromptTemplate(
input_variables=["topic", "outline"],
template="Write a blog post:\nTopic: {topic}\nOutline: {outline}"
)
content_chain = LLMChain(llm=llm, prompt=content_prompt, output_key="content")
# Sequential Chain
overall_chain = SequentialChain(
chains=[topic_chain, outline_chain, content_chain],
input_variables=["keyword"],
output_variables=["topic", "outline", "content"]
)
result = overall_chain.invoke({"keyword": "Python async programming"})
print(result["content"])
3. Prompt Templates
기본 템플릿
from langchain.prompts import PromptTemplate
# 단순 템플릿
template = PromptTemplate(
input_variables=["product"],
template="Write a product description for {product}"
)
prompt = template.format(product="iPhone 15")
Chat Prompt Template
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
system_template = "You are a {role} with {experience} years of experience."
system_message = SystemMessagePromptTemplate.from_template(system_template)
human_template = "{question}"
human_message = HumanMessagePromptTemplate.from_template(human_template)
chat_prompt = ChatPromptTemplate.from_messages([
system_message,
human_message
])
messages = chat_prompt.format_messages(
role="Python developer",
experience=10,
question="How do I optimize database queries?"
)
Few-Shot Prompt Template
from langchain.prompts import FewShotPromptTemplate
examples = [
{"input": "happy", "output": "sad"},
{"input": "hot", "output": "cold"},
{"input": "fast", "output": "slow"}
]
example_template = PromptTemplate(
input_variables=["input", "output"],
template="Input: {input}\nOutput: {output}"
)
few_shot_prompt = FewShotPromptTemplate(
examples=examples,
example_prompt=example_template,
prefix="Find the antonym:",
suffix="Input: {word}\nOutput:",
input_variables=["word"]
)
print(few_shot_prompt.format(word="big"))
4. Memory: 대화 이력
ConversationBufferMemory
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
memory = ConversationBufferMemory()
conversation = ConversationChain(
llm=llm,
memory=memory,
verbose=True
)
print(conversation.predict(input="Hi, I'm John"))
# "Hello John! How can I help you today?"
print(conversation.predict(input="What's my name?"))
# "Your name is John."
# 메모리 확인
print(memory.load_memory_variables({}))
ConversationBufferWindowMemory
from langchain.memory import ConversationBufferWindowMemory
# 최근 5개 메시지만 유지
memory = ConversationBufferWindowMemory(k=5)
conversation = ConversationChain(
llm=llm,
memory=memory
)
ConversationSummaryMemory
from langchain.memory import ConversationSummaryMemory
# 대화를 요약하여 저장 (토큰 절약)
memory = ConversationSummaryMemory(llm=llm)
conversation = ConversationChain(
llm=llm,
memory=memory
)
5. Agents: 자율 도구 사용
기본 Agent
from langchain.agents import initialize_agent, AgentType, Tool
from langchain_openai import ChatOpenAI
# 도구 정의
def search_wikipedia(query):
# 실제로는 Wikipedia API 호출
return f"Wikipedia result for: {query}"
def calculate(expression):
try:
return str(eval(expression))
except:
return "Invalid expression"
tools = [
Tool(
name="Wikipedia",
func=search_wikipedia,
description="Search Wikipedia for information"
),
Tool(
name="Calculator",
func=calculate,
description="Calculate mathematical expressions. Input should be a valid Python expression."
)
]
# Agent 초기화
agent = initialize_agent(
tools=tools,
llm=ChatOpenAI(model="gpt-4o-mini", temperature=0),
agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
verbose=True
)
# 실행
result = agent.run("What is the population of Seoul multiplied by 2?")
Custom Tool
from langchain.tools import BaseTool
from typing import Optional
class WeatherTool(BaseTool):
name = "weather"
description = "Get current weather for a location"
def _run(self, location: str) -> str:
# 실제 날씨 API 호출
return f"Weather in {location}: Sunny, 20°C"
async def _arun(self, location: str) -> str:
# 비동기 버전
return self._run(location)
tools = [WeatherTool()]
agent = initialize_agent(tools, llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION)
6. RAG 구현
문서 로딩
from langchain.document_loaders import TextLoader, PyPDFLoader, WebBaseLoader
# 텍스트 파일
loader = TextLoader("document.txt")
documents = loader.load()
# PDF
loader = PyPDFLoader("document.pdf")
documents = loader.load()
# 웹 페이지
loader = WebBaseLoader("https://example.com")
documents = loader.load()
문서 분할
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_documents(documents)
print(f"Split into {len(chunks)} chunks")
벡터 스토어 생성
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
# 임베딩 모델
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
# 벡터 스토어 생성
vectorstore = Chroma.from_documents(
documents=chunks,
embedding=embeddings,
persist_directory="./chroma_db"
)
RAG Chain
from langchain.chains import RetrievalQA
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
)
# 질문
question = "What is the main topic of the document?"
answer = qa_chain.run(question)
print(answer)
고급 RAG
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
qa = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
memory=memory
)
# 대화형 질문
print(qa({"question": "What is RAG?"}))
print(qa({"question": "Can you explain more about it?"}))
7. Vector Stores
Chroma
from langchain.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()
# 생성
vectorstore = Chroma.from_texts(
texts=["LangChain is awesome", "Python is great"],
embedding=embeddings,
persist_directory="./db"
)
# 유사도 검색
results = vectorstore.similarity_search("LangChain", k=2)
for doc in results:
print(doc.page_content)
FAISS
from langchain.vectorstores import FAISS
# 생성
vectorstore = FAISS.from_texts(
texts=["text1", "text2"],
embedding=embeddings
)
# 저장/로드
vectorstore.save_local("faiss_index")
vectorstore = FAISS.load_local("faiss_index", embeddings)
Pinecone (클라우드)
from langchain.vectorstores import Pinecone
import pinecone
pinecone.init(api_key="...", environment="...")
vectorstore = Pinecone.from_texts(
texts=["text1", "text2"],
embedding=embeddings,
index_name="my-index"
)
8. 실전 예제
예제 1: 문서 Q&A 챗봇
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
class DocumentQABot:
def __init__(self, docs_path):
# 1. 문서 로딩
loader = DirectoryLoader(docs_path, glob="**/*.txt")
documents = loader.load()
# 2. 문서 분할
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)
# 3. 벡터 스토어 생성
embeddings = OpenAIEmbeddings()
self.vectorstore = Chroma.from_documents(
chunks, embeddings, persist_directory="./db"
)
# 4. LLM 및 메모리
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
memory = ConversationBufferMemory(
memory_key="chat_history",
return_messages=True
)
# 5. QA Chain
self.qa_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=self.vectorstore.as_retriever(search_kwargs={"k": 3}),
memory=memory
)
def ask(self, question):
response = self.qa_chain({"question": question})
return response["answer"]
# 사용
bot = DocumentQABot("./docs")
print(bot.ask("What is the main topic?"))
print(bot.ask("Can you explain more?"))
예제 2: 코드 생성 Assistant
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser
class CodeGenerator:
def __init__(self):
self.llm = ChatOpenAI(model="gpt-4o", temperature=0.2)
self.prompt = ChatPromptTemplate.from_messages([
("system", """You are an expert programmer.
Generate clean, well-commented code following best practices.
Include error handling and type hints."""),
("user", """Language: {language}
Task: {task}
Generate code:""")
])
self.chain = self.prompt | self.llm | StrOutputParser()
def generate(self, task, language="python"):
return self.chain.invoke({
"language": language,
"task": task
})
# 사용
generator = CodeGenerator()
code = generator.generate(
"Read a CSV file and calculate column averages",
"python"
)
print(code)
예제 3: 다국어 번역기
from langchain.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
class Translator:
def __init__(self):
self.llm = ChatOpenAI(model="gpt-4o-mini")
self.prompt = ChatPromptTemplate.from_messages([
("system", "You are a professional translator."),
("user", "Translate to {target_lang}:\n\n{text}")
])
self.chain = self.prompt | self.llm
def translate(self, text, target_lang):
response = self.chain.invoke({
"text": text,
"target_lang": target_lang
})
return response.content
# 사용
translator = Translator()
result = translator.translate("Hello, world!", "Korean")
print(result) # "안녕하세요, 세계!"
RAG 심화
커스텀 Retriever
from langchain.schema import BaseRetriever, Document
from typing import List
class CustomRetriever(BaseRetriever):
def _get_relevant_documents(self, query: str) -> List[Document]:
# 커스텀 검색 로직
# 예: 데이터베이스 쿼리, API 호출 등
results = search_my_database(query)
return [Document(page_content=r) for r in results]
async def _aget_relevant_documents(self, query: str) -> List[Document]:
return self._get_relevant_documents(query)
Reranking
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
# 기본 retriever
base_retriever = vectorstore.as_retriever(search_kwargs={"k": 10})
# Compressor (관련성 높은 것만 선택)
compressor = LLMChainExtractor.from_llm(llm)
# Compression retriever
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor,
base_retriever=base_retriever
)
# 사용
docs = compression_retriever.get_relevant_documents("What is LangChain?")
비용 최적화
토큰 계산
import tiktoken
def count_tokens(text, model="gpt-4o-mini"):
encoding = tiktoken.encoding_for_model(model)
return len(encoding.encode(text))
# Chain에서 토큰 추적
from langchain.callbacks import get_openai_callback
with get_openai_callback() as cb:
result = chain.invoke({"input": "..."})
print(f"Total Tokens: {cb.total_tokens}")
print(f"Prompt Tokens: {cb.prompt_tokens}")
print(f"Completion Tokens: {cb.completion_tokens}")
print(f"Total Cost: ${cb.total_cost:.6f}")
캐싱
from langchain.cache import InMemoryCache, SQLiteCache
from langchain.globals import set_llm_cache
# 메모리 캐시
set_llm_cache(InMemoryCache())
# SQLite 캐시 (영구 저장)
set_llm_cache(SQLiteCache(database_path=".langchain.db"))
# 동일한 질문은 캐시에서 반환 (API 호출 없음)
llm.invoke("What is 2+2?") # API 호출
llm.invoke("What is 2+2?") # 캐시에서 반환
에러 처리
Retry 설정
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(
model="gpt-4o-mini",
max_retries=3,
timeout=30
)
Fallback
from langchain.chat_models import ChatOpenAI
from langchain.llms import OpenAI
# 메인 모델
primary_llm = ChatOpenAI(model="gpt-4o")
# 폴백 모델 (저렴)
fallback_llm = ChatOpenAI(model="gpt-4o-mini")
# Fallback chain
llm = primary_llm.with_fallbacks([fallback_llm])
실전 프로젝트: 문서 분석 API
from fastapi import FastAPI, UploadFile, HTTPException
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import tempfile
import os
app = FastAPI()
class DocumentAnalyzer:
def __init__(self):
self.embeddings = OpenAIEmbeddings()
self.llm = ChatOpenAI(model="gpt-4o-mini")
self.vectorstores = {}
def process_document(self, file_path, doc_id):
# 문서 로딩
loader = PyPDFLoader(file_path)
documents = loader.load()
# 분할
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
chunks = text_splitter.split_documents(documents)
# 벡터 스토어 생성
vectorstore = Chroma.from_documents(
chunks,
self.embeddings,
persist_directory=f"./db/{doc_id}"
)
self.vectorstores[doc_id] = vectorstore
return len(chunks)
def query(self, doc_id, question):
if doc_id not in self.vectorstores:
raise ValueError("Document not found")
qa_chain = RetrievalQA.from_chain_type(
llm=self.llm,
retriever=self.vectorstores[doc_id].as_retriever()
)
return qa_chain.run(question)
analyzer = DocumentAnalyzer()
@app.post("/upload")
async def upload_document(file: UploadFile):
# 임시 파일로 저장
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
content = await file.read()
tmp.write(content)
tmp_path = tmp.name
try:
doc_id = file.filename
chunks = analyzer.process_document(tmp_path, doc_id)
return {"doc_id": doc_id, "chunks": chunks}
finally:
os.unlink(tmp_path)
@app.post("/query")
async def query_document(doc_id: str, question: str):
try:
answer = analyzer.query(doc_id, question)
return {"answer": answer}
except Exception as e:
raise HTTPException(status_code=400, detail=str(e))
LangChain vs 직접 API 호출
직접 API 호출이 나은 경우
# 간단한 작업
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": "Translate to Korean: Hello"}]
)
LangChain이 나은 경우
# 복잡한 워크플로우
chain = (
load_documents
| split_documents
| create_embeddings
| store_in_vectordb
| retrieve_relevant
| generate_answer
)
베스트 프랙티스
1. 환경 변수 관리
# ✅ .env 파일
OPENAI_API_KEY=sk-...
PINECONE_API_KEY=...
# ✅ 코드에서 로드
from dotenv import load_dotenv
load_dotenv()
2. 에러 처리
# ✅ 모든 Chain에 try-except
try:
result = chain.invoke({"input": "..."})
except Exception as e:
logger.error(f"Chain error: {e}")
# 폴백 로직
3. 로깅
# ✅ Verbose 모드로 디버깅
chain = ConversationChain(llm=llm, verbose=True)
# ✅ 커스텀 콜백
from langchain.callbacks import StdOutCallbackHandler
chain.invoke({"input": "..."}, callbacks=[StdOutCallbackHandler()])
4. 테스트
# 단위 테스트
def test_chain():
chain = create_my_chain()
result = chain.invoke({"input": "test"})
assert len(result) > 0
참고 자료
한 줄 요약: LangChain은 LLM 애플리케이션 개발을 단순화하는 프레임워크로, Chains, Agents, Memory, RAG를 쉽게 구현할 수 있으며, 복잡한 AI 워크플로우 구축에 최적화되어 있습니다.