LangChain Async¶
One of the biggest pain-points developers discuss when trying to build useful LLM applications is latency; these applications often make multiple calls to LLM APIs, each one taking a few seconds. It can be quite a frustrating user experience to stare at a loading spinner for more than a couple seconds. Streaming helps reduce this perceived latency by returning the output of the LLM token by token, instead of all at once.
This notebook demonstrates how to monitor a LangChain streaming app with TruLens.
Import from LangChain and TruLens¶
In [ ]:
Copied!
# !pip install trulens trulens.apps.langchain trulens-providers-huggingface langchain>=0.0.342
# !pip install trulens trulens.apps.langchain trulens-providers-huggingface langchain>=0.0.342
In [ ]:
Copied!
import asyncio
from langchain.callbacks import AsyncIteratorCallbackHandler
from langchain.chains import LLMChain
from langchain.chat_models.openai import ChatOpenAI
from langchain.memory import ConversationSummaryBufferMemory
from langchain.prompts import PromptTemplate
from langchain_community.llms import OpenAI
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.providers.huggingface import Huggingface
import asyncio
from langchain.callbacks import AsyncIteratorCallbackHandler
from langchain.chains import LLMChain
from langchain.chat_models.openai import ChatOpenAI
from langchain.memory import ConversationSummaryBufferMemory
from langchain.prompts import PromptTemplate
from langchain_community.llms import OpenAI
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.providers.huggingface import Huggingface
In [ ]:
Copied!
import os
os.environ["HUGGINGFACE_API_KEY"] = "hf_..."
os.environ["OPENAI_API_KEY"] = "sk-..."
import os
os.environ["HUGGINGFACE_API_KEY"] = "hf_..."
os.environ["OPENAI_API_KEY"] = "sk-..."
Create Async Application¶
In [ ]:
Copied!
# Set up an async callback.
callback = AsyncIteratorCallbackHandler()
chatllm = ChatOpenAI(
temperature=0.0,
streaming=True, # important
)
llm = OpenAI(
temperature=0.0,
)
memory = ConversationSummaryBufferMemory(
memory_key="chat_history",
input_key="human_input",
llm=llm,
max_token_limit=50,
)
# Setup a simple question/answer chain with streaming ChatOpenAI.
prompt = PromptTemplate(
input_variables=["human_input", "chat_history"],
template="""
You are having a conversation with a person. Make small talk.
{chat_history}
Human: {human_input}
AI:""",
)
chain = LLMChain(llm=chatllm, prompt=prompt, memory=memory)
# Set up an async callback.
callback = AsyncIteratorCallbackHandler()
chatllm = ChatOpenAI(
temperature=0.0,
streaming=True, # important
)
llm = OpenAI(
temperature=0.0,
)
memory = ConversationSummaryBufferMemory(
memory_key="chat_history",
input_key="human_input",
llm=llm,
max_token_limit=50,
)
# Setup a simple question/answer chain with streaming ChatOpenAI.
prompt = PromptTemplate(
input_variables=["human_input", "chat_history"],
template="""
You are having a conversation with a person. Make small talk.
{chat_history}
Human: {human_input}
AI:""",
)
chain = LLMChain(llm=chatllm, prompt=prompt, memory=memory)
Set up a language match feedback function.¶
In [ ]:
Copied!
session = TruSession()
hugs = Huggingface()
f_lang_match = Feedback(hugs.language_match).on_input_output()
session = TruSession()
hugs = Huggingface()
f_lang_match = Feedback(hugs.language_match).on_input_output()
Set up evaluation and tracking with TruLens¶
In [ ]:
Copied!
# Example of how to also get filled-in prompt templates in timeline:
from trulens.core.instruments import instrument
from trulens.apps.langchain import TruChain
instrument.method(PromptTemplate, "format")
tc = TruChain(chain, feedbacks=[f_lang_match], app_name="chat_with_memory")
# Example of how to also get filled-in prompt templates in timeline:
from trulens.core.instruments import instrument
from trulens.apps.langchain import TruChain
instrument.method(PromptTemplate, "format")
tc = TruChain(chain, feedbacks=[f_lang_match], app_name="chat_with_memory")
In [ ]:
Copied!
tc.print_instrumented()
tc.print_instrumented()
Start the TruLens dashboard¶
In [ ]:
Copied!
from trulens.dashboard import run_dashboard
run_dashboard(session)
from trulens.dashboard import run_dashboard
run_dashboard(session)
Use the application¶
In [ ]:
Copied!
message = "Hi. How are you?"
with tc as recording:
task = asyncio.create_task(
chain.acall(
inputs=dict(human_input=message, chat_history=[]),
callbacks=[callback],
)
)
# Note, you either need to process all of the callback iterations or await task
# for record to be available.
async for token in callback.aiter():
print(token, end="")
# Make sure task was completed:
await task
record = recording.get()
message = "Hi. How are you?"
with tc as recording:
task = asyncio.create_task(
chain.acall(
inputs=dict(human_input=message, chat_history=[]),
callbacks=[callback],
)
)
# Note, you either need to process all of the callback iterations or await task
# for record to be available.
async for token in callback.aiter():
print(token, end="")
# Make sure task was completed:
await task
record = recording.get()