TruLens + OpenAI Agent SDK: Basic Tools Example¶

This notebook demonstrates how to instrument an OpenAI Agent SDK application with TruLens for observability and evaluation. It uses:

OpenAI — via the standard OpenAI API
function_tool — a custom Python function as a tool
TruLens — with the default SQLite database for tracing and feedback evaluation

Prerequisites: An OPENAI_API_KEY environment variable set.

In [ ]:

Copied!

!pip install -q openai-agents trulens-core trulens-providers-openai trulens-otel-semconv trulens-feedback
!pip install -q openai-agents trulens-core trulens-providers-openai trulens-otel-semconv trulens-feedback

In [ ]:

Copied!





from __future__ import annotations

from agents import Agent, AsyncOpenAI, OpenAIChatCompletionsModel, Runner, function_tool, set_tracing_disabled
from trulens.core import Metric, Selector, TruSession
from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes
from trulens.apps.app import TruApp
from trulens.providers.openai import OpenAI as fOpenAI
from __future__ import annotations

from agents import Agent, AsyncOpenAI, OpenAIChatCompletionsModel, Runner, function_tool, set_tracing_disabled
from trulens.core import Metric, Selector, TruSession
from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes
from trulens.apps.app import TruApp
from trulens.providers.openai import OpenAI as fOpenAI

In [ ]:

Copied!

set_tracing_disabled(True)
set_tracing_disabled(True)

In [ ]:

Copied!

session = TruSession()
session = TruSession()

Define Tools and Agent¶

In [ ]:

Copied!





@function_tool
@instrument(
    name="get_weather",
    span_type=SpanAttributes.SpanType.TOOL,
)
def get_weather(city: str) -> str:
    """Get the current weather for a city."""
    weather_data = {
        "new york": "72°F, Partly Cloudy",
        "london": "58°F, Rainy",
        "tokyo": "68°F, Clear",
        "san francisco": "61°F, Foggy",
        "paris": "65°F, Sunny",
    }
    result = weather_data.get(city.lower())
    if result:
        return f"{city}: {result}"
    return f"Weather data not available for {city}"
@function_tool
@instrument(
    name="get_weather",
    span_type=SpanAttributes.SpanType.TOOL,
)
def get_weather(city: str) -> str:
    """Get the current weather for a city."""
    weather_data = {
        "new york": "72°F, Partly Cloudy",
        "london": "58°F, Rainy",
        "tokyo": "68°F, Clear",
        "san francisco": "61°F, Foggy",
        "paris": "65°F, Sunny",
    }
    result = weather_data.get(city.lower())
    if result:
        return f"{city}: {result}"
    return f"Weather data not available for {city}"

In [ ]:

Copied!





openai_client = AsyncOpenAI()

agent = Agent(
    name="Research Assistant",
    instructions=(
        "You are a helpful research assistant. "
        "Use the weather tool for weather queries. "
        "Be concise in your answers."
    ),
    tools=[get_weather],
    model=OpenAIChatCompletionsModel(
        model="gpt-4o-mini",
        openai_client=openai_client,
    ),
)
openai_client = AsyncOpenAI()

agent = Agent(
    name="Research Assistant",
    instructions=(
        "You are a helpful research assistant. "
        "Use the weather tool for weather queries. "
        "Be concise in your answers."
    ),
    tools=[get_weather],
    model=OpenAIChatCompletionsModel(
        model="gpt-4o-mini",
        openai_client=openai_client,
    ),
)

Instrument with TruLens¶

In [ ]:

Copied!





class AgentApp:
    @instrument(
        span_type=SpanAttributes.SpanType.AGENT,
        attributes={
            SpanAttributes.RECORD_ROOT.INPUT: "question",
            SpanAttributes.RECORD_ROOT.OUTPUT: "return",
        },
    )
    async def ask(self, question: str) -> str:
        result = await Runner.run(agent, question)
        return result.final_output
class AgentApp:
    @instrument(
        span_type=SpanAttributes.SpanType.AGENT,
        attributes={
            SpanAttributes.RECORD_ROOT.INPUT: "question",
            SpanAttributes.RECORD_ROOT.OUTPUT: "return",
        },
    )
    async def ask(self, question: str) -> str:
        result = await Runner.run(agent, question)
        return result.final_output

Define Metrics¶

In [ ]:

Copied!





provider = fOpenAI(model_engine="gpt-4o-mini")

f_answer_relevance = Metric(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance",
    selectors={
        "prompt": Selector.select_record_input(),
        "response": Selector.select_record_output(),
    },
)

f_coherence = Metric(
    provider.coherence_with_cot_reasons,
    name="Coherence",
    selectors={
        "text": Selector.select_record_output(),
    },
)
provider = fOpenAI(model_engine="gpt-4o-mini")

f_answer_relevance = Metric(
    provider.relevance_with_cot_reasons,
    name="Answer Relevance",
    selectors={
        "prompt": Selector.select_record_input(),
        "response": Selector.select_record_output(),
    },
)

f_coherence = Metric(
    provider.coherence_with_cot_reasons,
    name="Coherence",
    selectors={
        "text": Selector.select_record_output(),
    },
)

In [ ]:

Copied!





agent_app = AgentApp()

tru_app = TruApp(
    agent_app,
    app_name="Research Assistant",
    app_version="v1",
    main_method=agent_app.ask,
    feedbacks=[f_answer_relevance, f_coherence],
)
agent_app = AgentApp()

tru_app = TruApp(
    agent_app,
    app_name="Research Assistant",
    app_version="v1",
    main_method=agent_app.ask,
    feedbacks=[f_answer_relevance, f_coherence],
)

Run Example Queries¶

In [ ]:

Copied!





questions = [
    "What is the weather in Tokyo?",
]

for q in questions:
    print(f"\nQ: {q}")
    with tru_app as recording:
        answer = await agent_app.ask(q)
    print(f"A: {answer[:500]}")
questions = [
    "What is the weather in Tokyo?",
]

for q in questions:
    print(f"\nQ: {q}")
    with tru_app as recording:
        answer = await agent_app.ask(q)
    print(f"A: {answer[:500]}")

View Results¶

In [ ]:

Copied!

session.get_leaderboard()
session.get_leaderboard()

Launch Dashboard¶

Launch the TruLens dashboard in a new browser tab:

In [ ]:

Copied!

from trulens.dashboard import run_dashboard

run_dashboard()
from trulens.dashboard import run_dashboard

run_dashboard()