📓 LangChain Quickstart¶
In this quickstart you will create a simple LLM Chain and learn how to log it and get feedback on an LLM response.
In [ ]:
Copied!
# ! pip install trulens_eval openai langchain chromadb langchainhub bs4 tiktoken
# ! pip install trulens_eval openai langchain chromadb langchainhub bs4 tiktoken
In [ ]:
Copied!
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
import os
os.environ["OPENAI_API_KEY"] = "sk-..."
Import from LangChain and TruLens¶
In [ ]:
Copied!
# Imports main tools:
from trulens_eval import TruChain, Tru
tru = Tru()
tru.reset_database()
# Imports from LangChain to build app
import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
# Imports main tools:
from trulens_eval import TruChain, Tru
tru = Tru()
tru.reset_database()
# Imports from LangChain to build app
import bs4
from langchain import hub
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import StrOutputParser
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnablePassthrough
Load documents¶
In [ ]:
Copied!
loader = WebBaseLoader(
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
bs_kwargs=dict(
parse_only=bs4.SoupStrainer(
class_=("post-content", "post-title", "post-header")
)
),
)
docs = loader.load()
loader = WebBaseLoader(
web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
bs_kwargs=dict(
parse_only=bs4.SoupStrainer(
class_=("post-content", "post-title", "post-header")
)
),
)
docs = loader.load()
Create Vector Store¶
In [ ]:
Copied!
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
documents=splits,
embedding=OpenAIEmbeddings()
)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200
)
splits = text_splitter.split_documents(docs)
vectorstore = Chroma.from_documents(
documents=splits,
embedding=OpenAIEmbeddings()
)
Create RAG¶
In [ ]:
Copied!
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
retriever = vectorstore.as_retriever()
prompt = hub.pull("rlm/rag-prompt")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
Send your first request¶
In [ ]:
Copied!
rag_chain.invoke("What is Task Decomposition?")
rag_chain.invoke("What is Task Decomposition?")
Initialize Feedback Function(s)¶
In [ ]:
Copied!
from trulens_eval.feedback.provider import OpenAI
from trulens_eval import Feedback
import numpy as np
# Initialize provider class
provider = OpenAI()
# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rag_chain)
from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
Feedback(grounded.groundedness_measure_with_cot_reasons)
.on(context.collect()) # collect context chunks into a list
.on_output()
.aggregate(grounded.grounded_statements_aggregator)
)
# Question/answer relevance between overall question and answer.
f_answer_relevance = (
Feedback(provider.relevance)
.on_input_output()
)
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
Feedback(provider.context_relevance_with_cot_reasons)
.on_input()
.on(context)
.aggregate(np.mean)
)
from trulens_eval.feedback.provider import OpenAI
from trulens_eval import Feedback
import numpy as np
# Initialize provider class
provider = OpenAI()
# select context to be used in feedback. the location of context is app specific.
from trulens_eval.app import App
context = App.select_context(rag_chain)
from trulens_eval.feedback import Groundedness
grounded = Groundedness(groundedness_provider=OpenAI())
# Define a groundedness feedback function
f_groundedness = (
Feedback(grounded.groundedness_measure_with_cot_reasons)
.on(context.collect()) # collect context chunks into a list
.on_output()
.aggregate(grounded.grounded_statements_aggregator)
)
# Question/answer relevance between overall question and answer.
f_answer_relevance = (
Feedback(provider.relevance)
.on_input_output()
)
# Question/statement relevance between question and each context chunk.
f_context_relevance = (
Feedback(provider.context_relevance_with_cot_reasons)
.on_input()
.on(context)
.aggregate(np.mean)
)
Instrument chain for logging with TruLens¶
In [ ]:
Copied!
tru_recorder = TruChain(rag_chain,
app_id='Chain1_ChatApplication',
feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness])
tru_recorder = TruChain(rag_chain,
app_id='Chain1_ChatApplication',
feedbacks=[f_answer_relevance, f_context_relevance, f_groundedness])
In [ ]:
Copied!
response, tru_record = tru_recorder.with_record(rag_chain.invoke, "What is Task Decomposition?")
response, tru_record = tru_recorder.with_record(rag_chain.invoke, "What is Task Decomposition?")
In [ ]:
Copied!
json_like = tru_record.layout_calls_as_app()
json_like = tru_record.layout_calls_as_app()
In [ ]:
Copied!
json_like
json_like
In [ ]:
Copied!
from ipytree import Tree, Node
def display_call_stack(data):
tree = Tree()
tree.add_node(Node('Record ID: {}'.format(data['record_id'])))
tree.add_node(Node('App ID: {}'.format(data['app_id'])))
tree.add_node(Node('Cost: {}'.format(data['cost'])))
tree.add_node(Node('Performance: {}'.format(data['perf'])))
tree.add_node(Node('Timestamp: {}'.format(data['ts'])))
tree.add_node(Node('Tags: {}'.format(data['tags'])))
tree.add_node(Node('Main Input: {}'.format(data['main_input'])))
tree.add_node(Node('Main Output: {}'.format(data['main_output'])))
tree.add_node(Node('Main Error: {}'.format(data['main_error'])))
calls_node = Node('Calls')
tree.add_node(calls_node)
for call in data['calls']:
call_node = Node('Call')
calls_node.add_node(call_node)
for step in call['stack']:
step_node = Node('Step: {}'.format(step['path']))
call_node.add_node(step_node)
if 'expanded' in step:
expanded_node = Node('Expanded')
step_node.add_node(expanded_node)
for expanded_step in step['expanded']:
expanded_step_node = Node('Step: {}'.format(expanded_step['path']))
expanded_node.add_node(expanded_step_node)
return tree
# Usage
tree = display_call_stack(json_like)
tree
from ipytree import Tree, Node
def display_call_stack(data):
tree = Tree()
tree.add_node(Node('Record ID: {}'.format(data['record_id'])))
tree.add_node(Node('App ID: {}'.format(data['app_id'])))
tree.add_node(Node('Cost: {}'.format(data['cost'])))
tree.add_node(Node('Performance: {}'.format(data['perf'])))
tree.add_node(Node('Timestamp: {}'.format(data['ts'])))
tree.add_node(Node('Tags: {}'.format(data['tags'])))
tree.add_node(Node('Main Input: {}'.format(data['main_input'])))
tree.add_node(Node('Main Output: {}'.format(data['main_output'])))
tree.add_node(Node('Main Error: {}'.format(data['main_error'])))
calls_node = Node('Calls')
tree.add_node(calls_node)
for call in data['calls']:
call_node = Node('Call')
calls_node.add_node(call_node)
for step in call['stack']:
step_node = Node('Step: {}'.format(step['path']))
call_node.add_node(step_node)
if 'expanded' in step:
expanded_node = Node('Expanded')
step_node.add_node(expanded_node)
for expanded_step in step['expanded']:
expanded_step_node = Node('Step: {}'.format(expanded_step['path']))
expanded_node.add_node(expanded_step_node)
return tree
# Usage
tree = display_call_stack(json_like)
tree
In [ ]:
Copied!
tree
tree
In [ ]:
Copied!
with tru_recorder as recording:
llm_response = rag_chain.invoke("What is Task Decomposition?")
display(llm_response)
with tru_recorder as recording:
llm_response = rag_chain.invoke("What is Task Decomposition?")
display(llm_response)
Retrieve records and feedback¶
In [ ]:
Copied!
# The record of the app invocation can be retrieved from the `recording`:
rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple
display(rec)
# The record of the app invocation can be retrieved from the `recording`:
rec = recording.get() # use .get if only one record
# recs = recording.records # use .records if multiple
display(rec)
In [ ]:
Copied!
# The results of the feedback functions can be rertireved from
# `Record.feedback_results` or using the `wait_for_feedback_result` method. The
# results if retrieved directly are `Future` instances (see
# `concurrent.futures`). You can use `as_completed` to wait until they have
# finished evaluating or use the utility method:
for feedback, feedback_result in rec.wait_for_feedback_results().items():
print(feedback.name, feedback_result.result)
# See more about wait_for_feedback_results:
# help(rec.wait_for_feedback_results)
# The results of the feedback functions can be rertireved from
# `Record.feedback_results` or using the `wait_for_feedback_result` method. The
# results if retrieved directly are `Future` instances (see
# `concurrent.futures`). You can use `as_completed` to wait until they have
# finished evaluating or use the utility method:
for feedback, feedback_result in rec.wait_for_feedback_results().items():
print(feedback.name, feedback_result.result)
# See more about wait_for_feedback_results:
# help(rec.wait_for_feedback_results)
In [ ]:
Copied!
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])
records.head()
records, feedback = tru.get_records_and_feedback(app_ids=["Chain1_ChatApplication"])
records.head()
In [ ]:
Copied!
tru.get_leaderboard(app_ids=["Chain1_ChatApplication"])
tru.get_leaderboard(app_ids=["Chain1_ChatApplication"])
Explore in a Dashboard¶
In [ ]:
Copied!
tru.run_dashboard() # open a local streamlit app to explore
# tru.stop_dashboard() # stop if needed
tru.run_dashboard() # open a local streamlit app to explore
# tru.stop_dashboard() # stop if needed
Alternatively, you can run trulens-eval
from a command line in the same folder to start the dashboard.
Note: Feedback functions evaluated in the deferred manner can be seen in the "Progress" page of the TruLens dashboard.