TruLens + OpenAI Agent SDK: Basic Tools Example¶
This notebook demonstrates how to instrument an OpenAI Agent SDK application with TruLens for observability and evaluation. It uses:
- OpenAI — via the standard OpenAI API
- function_tool — a custom Python function as a tool
- TruLens — with the default SQLite database for tracing and feedback evaluation
Prerequisites: An OPENAI_API_KEY environment variable set.
In [ ]:
Copied!
!pip install -q openai-agents trulens-core trulens-providers-openai trulens-otel-semconv trulens-feedback
!pip install -q openai-agents trulens-core trulens-providers-openai trulens-otel-semconv trulens-feedback
In [ ]:
Copied!
from __future__ import annotations
from agents import Agent, AsyncOpenAI, OpenAIChatCompletionsModel, Runner, function_tool, set_tracing_disabled
from trulens.core import Metric, Selector, TruSession
from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes
from trulens.apps.app import TruApp
from trulens.providers.openai import OpenAI as fOpenAI
from __future__ import annotations
from agents import Agent, AsyncOpenAI, OpenAIChatCompletionsModel, Runner, function_tool, set_tracing_disabled
from trulens.core import Metric, Selector, TruSession
from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes
from trulens.apps.app import TruApp
from trulens.providers.openai import OpenAI as fOpenAI
In [ ]:
Copied!
set_tracing_disabled(True)
set_tracing_disabled(True)
In [ ]:
Copied!
session = TruSession()
session = TruSession()
Define Tools and Agent¶
In [ ]:
Copied!
@function_tool
@instrument(
name="get_weather",
span_type=SpanAttributes.SpanType.TOOL,
)
def get_weather(city: str) -> str:
"""Get the current weather for a city."""
weather_data = {
"new york": "72°F, Partly Cloudy",
"london": "58°F, Rainy",
"tokyo": "68°F, Clear",
"san francisco": "61°F, Foggy",
"paris": "65°F, Sunny",
}
result = weather_data.get(city.lower())
if result:
return f"{city}: {result}"
return f"Weather data not available for {city}"
@function_tool
@instrument(
name="get_weather",
span_type=SpanAttributes.SpanType.TOOL,
)
def get_weather(city: str) -> str:
"""Get the current weather for a city."""
weather_data = {
"new york": "72°F, Partly Cloudy",
"london": "58°F, Rainy",
"tokyo": "68°F, Clear",
"san francisco": "61°F, Foggy",
"paris": "65°F, Sunny",
}
result = weather_data.get(city.lower())
if result:
return f"{city}: {result}"
return f"Weather data not available for {city}"
In [ ]:
Copied!
openai_client = AsyncOpenAI()
agent = Agent(
name="Research Assistant",
instructions=(
"You are a helpful research assistant. "
"Use the weather tool for weather queries. "
"Be concise in your answers."
),
tools=[get_weather],
model=OpenAIChatCompletionsModel(
model="gpt-4o-mini",
openai_client=openai_client,
),
)
openai_client = AsyncOpenAI()
agent = Agent(
name="Research Assistant",
instructions=(
"You are a helpful research assistant. "
"Use the weather tool for weather queries. "
"Be concise in your answers."
),
tools=[get_weather],
model=OpenAIChatCompletionsModel(
model="gpt-4o-mini",
openai_client=openai_client,
),
)
Instrument with TruLens¶
In [ ]:
Copied!
class AgentApp:
@instrument(
span_type=SpanAttributes.SpanType.AGENT,
attributes={
SpanAttributes.RECORD_ROOT.INPUT: "question",
SpanAttributes.RECORD_ROOT.OUTPUT: "return",
},
)
async def ask(self, question: str) -> str:
result = await Runner.run(agent, question)
return result.final_output
class AgentApp:
@instrument(
span_type=SpanAttributes.SpanType.AGENT,
attributes={
SpanAttributes.RECORD_ROOT.INPUT: "question",
SpanAttributes.RECORD_ROOT.OUTPUT: "return",
},
)
async def ask(self, question: str) -> str:
result = await Runner.run(agent, question)
return result.final_output
Define Metrics¶
In [ ]:
Copied!
provider = fOpenAI(model_engine="gpt-4o-mini")
f_answer_relevance = Metric(
provider.relevance_with_cot_reasons,
name="Answer Relevance",
selectors={
"prompt": Selector.select_record_input(),
"response": Selector.select_record_output(),
},
)
f_coherence = Metric(
provider.coherence_with_cot_reasons,
name="Coherence",
selectors={
"text": Selector.select_record_output(),
},
)
provider = fOpenAI(model_engine="gpt-4o-mini")
f_answer_relevance = Metric(
provider.relevance_with_cot_reasons,
name="Answer Relevance",
selectors={
"prompt": Selector.select_record_input(),
"response": Selector.select_record_output(),
},
)
f_coherence = Metric(
provider.coherence_with_cot_reasons,
name="Coherence",
selectors={
"text": Selector.select_record_output(),
},
)
In [ ]:
Copied!
agent_app = AgentApp()
tru_app = TruApp(
agent_app,
app_name="Research Assistant",
app_version="v1",
main_method=agent_app.ask,
feedbacks=[f_answer_relevance, f_coherence],
)
agent_app = AgentApp()
tru_app = TruApp(
agent_app,
app_name="Research Assistant",
app_version="v1",
main_method=agent_app.ask,
feedbacks=[f_answer_relevance, f_coherence],
)
Run Example Queries¶
In [ ]:
Copied!
questions = [
"What is the weather in Tokyo?",
]
for q in questions:
print(f"\nQ: {q}")
with tru_app as recording:
answer = await agent_app.ask(q)
print(f"A: {answer[:500]}")
questions = [
"What is the weather in Tokyo?",
]
for q in questions:
print(f"\nQ: {q}")
with tru_app as recording:
answer = await agent_app.ask(q)
print(f"A: {answer[:500]}")
View Results¶
In [ ]:
Copied!
session.get_leaderboard()
session.get_leaderboard()
Launch Dashboard¶
Launch the TruLens dashboard in a new browser tab:
In [ ]:
Copied!
from trulens.dashboard import run_dashboard
run_dashboard()
from trulens.dashboard import run_dashboard
run_dashboard()