๐ Build and Evaluate a Web Search Agentยถ
Build a web-enabled data agent that can operate across perform web research, answer questions, and generate charts. Then evaluate it to identify failure modes.
For this example you will need access to LLMs (OpenAI) and web search (Tavily).
!pip install trulens langgraph trulens-apps-langgraph trulens-providers-openai openai matplotlib langchain_openai langchain_tavily langchain_experimental -q
import os
os.environ["OPENAI_API_KEY"] = "sk-proj-..."
os.environ["TAVILY_API_KEY"] = "tvly-dev-..."
1. Initialize the agent's stateยถ
State provides the agent a shared, evolving memory across nodes so that the agents have the context and instructions needed to act coherently and achieve the goal.
In addition to the additional state variables we're adding, our State will also inherit messages from MessageState to track the conversation.
from typing import Any, Dict, List, Literal, Optional
from langgraph.graph import MessagesState
# Custom State class with specific keys
class State(MessagesState):
user_query: Optional[str] # The user's original query
enabled_agents: Optional[
List[str]
] # Makes our multi-agent system modular on which agents to include
plan: Optional[
List[Dict[int, Dict[str, Any]]]
] # Listing the steps in the plan needed to achieve the goal.
current_step: int # Marking the current step in the plan.
agent_query: Optional[
str
] # Inbox note: `agent_query` tells the next agent exactly what to do at the current step.
last_reason: Optional[
str
] # Explains the executorโs decision to help maintain continuity and provide traceability.
replan_flag: Optional[
bool
] # Set by the executor to indicate that the planner should revise the plan.
replan_attempts: Optional[
Dict[int, Dict[int, int]]
] # Replan attempts tracked per step number.
2. Create plannerยถ
import json
from typing import Any, Dict, List
from langchain_core.messages import HumanMessage
MAX_REPLANS = 2
def get_agent_descriptions() -> Dict[str, Dict[str, Any]]:
"""
Return structured agent descriptions with capabilities and guidelines.
Edit this function to change how the planner/executor reason about agents.
"""
return {
"web_researcher": {
"name": "Web Researcher",
"capability": "Fetch public data via Tavily web search",
"use_when": "Public information, news, current events, or external facts are needed",
"limitations": "Cannot access private/internal company data",
"output_format": "Raw research data and findings from public sources",
},
"chart_generator": {
"name": "Chart Generator",
"capability": "Build visualizations from structured data",
"use_when": "User explicitly requests charts, graphs, plots, visualizations (keywords: chart, graph, plot, visualise, bar-chart, line-chart, histogram, etc.)",
"limitations": "Requires structured data input from previous steps",
"output_format": "Visual charts and graphs",
"position_requirement": "Must be used as final step after data gathering is complete",
},
"chart_summarizer": {
"name": "Chart Summarizer",
"capability": "Summarize and explain chart visualizations",
"use_when": "After chart_generator has created a visualization",
"limitations": "Requires a chart as input",
"output_format": "Written summary and analysis of chart content",
},
"synthesizer": {
"name": "Synthesizer",
"capability": "Write comprehensive prose summaries of findings",
"use_when": "Final step when no visualization is requested - combines all previous research",
"limitations": "Requires research data from previous steps",
"output_format": "Coherent written summary incorporating all findings",
"position_requirement": "Should be used as final step when no chart is needed",
},
}
def _get_enabled_agents(state: State | None = None) -> List[str]:
"""Return enabled agents; if absent, use baseline/default.
Supports both dict-style and attribute-style state objects.
"""
baseline = [
"web_researcher",
"chart_generator",
"chart_summarizer",
"synthesizer",
]
if not state:
return baseline
val = (
state.get("enabled_agents")
if hasattr(state, "get")
else getattr(state, "enabled_agents", None)
)
if isinstance(val, list) and val:
allowed = {
"web_researcher",
"chart_generator",
"chart_summarizer",
"synthesizer",
}
filtered = [a for a in val if a in allowed]
return filtered
return baseline
def format_agent_list_for_planning(state: State | None = None) -> str:
"""
Format agent descriptions for the planning prompt.
"""
descriptions = get_agent_descriptions()
enabled_list = _get_enabled_agents(state)
agent_list = []
for agent_key, details in descriptions.items():
if agent_key not in enabled_list:
continue
agent_list.append(f" โข `{agent_key}` โ {details['capability']}")
return "\n".join(agent_list)
def format_agent_guidelines_for_planning(state: State | None = None) -> str:
"""
Format agent usage guidelines for the planning prompt.
"""
descriptions = get_agent_descriptions()
enabled = set(_get_enabled_agents(state))
guidelines = []
if "web_researcher" in enabled:
guidelines.append(
f"- Use `web_researcher` for {descriptions['web_researcher']['use_when'].lower()}."
)
# Chart generator specific rules
if "chart_generator" in enabled:
chart_desc = descriptions["chart_generator"]
cs_hint = (
" A `chart_summarizer` should be used to summarize the chart."
if "chart_summarizer" in enabled
else ""
)
guidelines.append(
f"- **Include `chart_generator` _only_ if {chart_desc['use_when'].lower()}**. If included, `chart_generator` must be {chart_desc['position_requirement'].lower()}. Visualizations should include all of the data from the previous steps that is reasonable for the chart type.{cs_hint}"
)
# Synthesizer default
if "synthesizer" in enabled:
synth_desc = descriptions["synthesizer"]
guidelines.append(
f" โ Otherwise use `synthesizer` as {synth_desc['position_requirement'].lower()}, and be sure to include all of the data from the previous steps."
)
return "\n".join(guidelines)
def plan_prompt(state: State) -> HumanMessage:
"""
Build the prompt that instructs the LLM to return a highโlevel plan.
"""
replan_flag = state.get("replan_flag", False)
user_query = state.get("user_query", state["messages"][0].content)
prior_plan = state.get("plan") or {}
replan_reason = state.get("last_reason", "")
# Get agent descriptions dynamically
agent_list = format_agent_list_for_planning(state)
agent_guidelines = format_agent_guidelines_for_planning(state)
enabled_list = _get_enabled_agents(state)
# Build planner agent enum based on enabled agents
enabled_for_planner = [
a
for a in enabled_list
if a
in (
"web_researcher",
"cortex_researcher",
"chart_generator",
"synthesizer",
)
]
planner_agent_enum = (
" | ".join(enabled_for_planner)
or "web_researcher | chart_generator | synthesizer"
)
prompt = f"""
You are the **Planner** in a multiโagent system. Break the user's request
into a sequence of numbered steps (1,โฏ2,โฏ3, โฆ). **There is no hard limit on
step count** as long as the plan is concise and each step has a clear goal.
You may decompose the user's query into sub-queries, each of which is a
separate step. Break the query into the smallest possible sub-queries
so that each sub-query is answerable with a single data source.
For example, if the user's query is "What were the key
action items in the last quarter, and what was a recent news story for
each of them?", you may break it into steps:
1. Fetch the key action items in the last quarter.
2. Fetch a recent news story for the first action item.
3. Fetch a recent news story for the second action item.
4. Fetch a recent news story for the last action item
Here is a list of available agents you can call upon to execute the tasks in your plan. You may call only one agent per step.
{agent_list}
Return **ONLY** valid JSON (no markdown, no explanations) in this form:
{{
"1": {{
"agent": "{planner_agent_enum}",
"action": "string",
"goal": "string",
"pre_conditions": ["string", ...],
"post_conditions": ["string", ...]
}},
"2": {{ ... }},
"3": {{ ... }}
}}
Guidelines:
{agent_guidelines}
"""
if replan_flag:
prompt += f"""
The current plan needs revision because: {replan_reason}
Current plan:
{json.dumps(prior_plan, indent=2)}
When replanning:
- Focus on UNBLOCKING the workflow rather than perfecting it.
- Only modify steps that are truly preventing progress.
- Prefer simpler, more achievable alternatives over complex rewrites.
"""
else:
prompt += "\nGenerate a new plan from scratch."
prompt += f'\nUser query: "{user_query}"'
return HumanMessage(content=prompt)
from langchain_core.messages import HumanMessage
from langchain_openai import ChatOpenAI
from langgraph.types import Command
# โโ LLMs โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
reasoning_llm = ChatOpenAI(
model="o3",
model_kwargs={"response_format": {"type": "json_object"}},
)
def planner_node(state: State) -> Command[Literal["executor"]]:
"""
Runs the planning LLM and stores the resulting plan in state.
"""
# 1. Invoke LLM with the planner prompt
llm_reply = reasoning_llm.invoke([plan_prompt(state)])
# 2. Validate JSON
try:
content_str = (
llm_reply.content
if isinstance(llm_reply.content, str)
else str(llm_reply.content)
)
parsed_plan = json.loads(content_str)
except json.JSONDecodeError:
raise ValueError(f"Planner returned invalid JSON:\n{llm_reply.content}")
# 3. Store as current plan only
replan = state.get("replan_flag", False)
updated_plan: Dict[str, Any] = parsed_plan
return Command(
update={
"plan": updated_plan,
"messages": [
HumanMessage(
content=llm_reply.content,
name="replan" if replan else "initial_plan",
)
],
"user_query": state.get("user_query", state["messages"][0].content),
"current_step": 1 if not replan else state["current_step"],
# Preserve replan flag so executor runs planned agent once before reconsidering
"replan_flag": state.get("replan_flag", False),
"last_reason": "",
"enabled_agents": state.get("enabled_agents"),
},
goto="executor",
)
3. Create executorยถ
def format_agent_guidelines_for_executor(state: State | None = None) -> str:
"""
Format agent usage guidelines for the executor prompt.
"""
descriptions = get_agent_descriptions()
enabled = _get_enabled_agents(state)
guidelines = []
if "web_researcher" in enabled:
web_desc = descriptions["web_researcher"]
guidelines.append(
f"- Use `\"web_researcher\"` when {web_desc['use_when'].lower()}."
)
if "cortex_researcher" in enabled:
cortex_desc = descriptions["cortex_researcher"]
guidelines.append(
f"- Use `\"cortex_researcher\"` for {cortex_desc['use_when'].lower()}."
)
return "\n".join(guidelines)
def executor_prompt(state: State) -> HumanMessage:
"""
Build the singleโturn JSON prompt that drives the executor LLM.
"""
step = int(state.get("current_step", 0))
latest_plan: Dict[str, Any] = state.get("plan") or {}
plan_block: Dict[str, Any] = latest_plan.get(str(step), {})
max_replans = MAX_REPLANS
# Get agent guidelines dynamically
executor_guidelines = format_agent_guidelines_for_executor(state)
plan_agent = plan_block.get("agent", "web_researcher")
messages_tail = (state.get("messages") or [])[-4:]
executor_prompt = f"""
You are the **executor** in a multiโagent system with these agents:
`{"`, `".join(sorted(set([a for a in _get_enabled_agents(state) if a in ["web_researcher", "cortex_researcher", "chart_generator", "chart_summarizer", "synthesizer"]] + ["planner"])))}`.
**Tasks**
1. Decide if the current plan needs revision. โ `"replan_flag": true|false`
2. Decide which agent to run next. โ `"goto": "<agent_name>"`
3. Give oneโsentence justification. โ `"reason": "<text>"`
4. Write the exact question that the chosen agent should answer
โ "query": "<text>"
**Guidelines**
{executor_guidelines}
- After **{MAX_REPLANS}** failed replans for the same step, move on.
- If you *just replanned* (replan_flag is true) let the assigned agent try before
requesting another replan.
Respond **only** with valid JSON (no additional text):
{{
"replan": <true|false>,
"goto": "<{"|".join([a for a in _get_enabled_agents(state) if a in ["web_researcher", "cortex_researcher", "chart_generator", "chart_summarizer", "synthesizer"]] + ["planner"])}>",
"reason": "<1 sentence>",
"query": "<text>"
}}
**PRIORITIZE FORWARD PROGRESS:** Only replan if the current step is completely blocked.
1. If any reasonable data was obtained that addresses the step's core goal, set `"replan": false` and proceed.
2. Set `"replan": true` **only if** ALL of these conditions are met:
โข The step has produced zero useful information
โข The missing information cannot be approximated or obtained by remaining steps
โข `attempts < {max_replans}`
3. When `attempts == {max_replans}`, always move forward (`"replan": false`).
### Decide `"goto"`
- If `"replan": true` โ `"goto": "planner"`.
- If current step has made reasonable progress โ move to next step's agent.
- Otherwise execute the current step's assigned agent (`{plan_agent}`).
### Build `"query"`
Write a clear, standalone instruction for the chosen agent. If the chosen agent
is `web_researcher` or `cortex_researcher`, the query should be a standalone question,
written in plain english, and answerable by the agent.
Ensure that the query uses consistent language as the user's query.
Context you can rely on
- User query ..............: {state.get("user_query")}
- Current step index ......: {step}
- Current plan step .......: {plan_block}
- Justโreplanned flag .....: {state.get("replan_flag")}
- Previous messages .......: {messages_tail}
Respond **only** with JSON, no extra text.
"""
return HumanMessage(content=executor_prompt)
from langgraph.graph import END
MAX_REPLANS = 3
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
# Executor node
# โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ
def executor_node(
state: State,
) -> Command[
Literal["web_researcher", "chart_generator", "synthesizer", "planner"]
]:
plan: Dict[str, Any] = state.get("plan", {})
step: int = state.get("current_step", 1)
# 0) If we *just* replanned, run the planned agent once before reconsidering.
if state.get("replan_flag"):
planned_agent = plan.get(str(step), {}).get("agent")
return Command(
update={
"replan_flag": False,
"current_step": step
+ 1, # advance because we executed the planned agent
},
goto=planned_agent,
)
# 1) Build prompt & call LLM
llm_reply = reasoning_llm.invoke([executor_prompt(state)])
try:
content_str = (
llm_reply.content
if isinstance(llm_reply.content, str)
else str(llm_reply.content)
)
parsed = json.loads(content_str)
replan: bool = parsed["replan"]
goto: str = parsed["goto"]
reason: str = parsed["reason"]
query: str = parsed["query"]
except Exception as exc:
raise ValueError(
f"Invalid executor JSON:\n{llm_reply.content}"
) from exc
# Upodate the state
updates: Dict[str, Any] = {
"messages": [HumanMessage(content=llm_reply.content, name="executor")],
"last_reason": reason,
"agent_query": query,
}
# Replan accounting
replans: Dict[int, int] = state.get("replan_attempts", {}) or {}
step_replans = replans.get(step, 0)
# 2) Replan decision
if replan:
if step_replans < MAX_REPLANS:
replans[step] = step_replans + 1
updates.update({
"replan_attempts": replans,
"replan_flag": True, # ensure next turn executes the planned agent once
"current_step": step, # stay on same step for the new plan
})
return Command(update=updates, goto="planner")
else:
# Cap hit: skip this step; let next step (or synthesizer) handle termination
next_agent = plan.get(str(step + 1), {}).get("agent", "synthesizer")
updates["current_step"] = step + 1
return Command(update=updates, goto=next_agent)
# 3) Happy path: run chosen agent; advance only if following the plan
planned_agent = plan.get(str(step), {}).get("agent")
updates["current_step"] = step + 1 if goto == planned_agent else step
updates["replan_flag"] = False
return Command(update=updates, goto=goto)
4. Create Web Search Agentยถ
from typing import Literal
from langchain_openai import ChatOpenAI
from langchain_tavily import TavilySearch
from langgraph.prebuilt import create_react_agent
tavily_tool = TavilySearch(max_results=5)
tavily_tool.invoke("What is JP Morgan's stock price?")["results"]
def agent_system_prompt(suffix: str) -> str:
return (
"You are a helpful AI assistant, collaborating with other assistants."
" Use the provided tools to progress towards answering the question."
" If you are unable to fully answer, that's OK, another assistant with different tools "
" will help where you left off. Execute what you can to make progress."
" If you or any of the other assistants have the final answer or deliverable,"
" prefix your response with FINAL ANSWER so the team knows to stop."
f"\n{suffix}"
)
llm = ChatOpenAI(model="gpt-4o")
# Research agent and node
web_search_agent = create_react_agent(
llm,
tools=[tavily_tool],
prompt=agent_system_prompt("""
You are the Researcher. You can ONLY perform research by using the provided search tool (tavily_tool).
When you have found the necessary information, end your output.
Do NOT attempt to take further actions.
"""),
)
agent_response = web_search_agent.invoke({
"messages": "what is jp morgan's current market cap?"
})
agent_response["messages"][-1].content
from trulens.core.otel.instrument import instrument
from trulens.otel.semconv.trace import SpanAttributes
@instrument(
span_type=SpanAttributes.SpanType.RETRIEVAL,
attributes=lambda ret, exception, *args, **kwargs: {
SpanAttributes.RETRIEVAL.QUERY_TEXT: args[0].get("agent_query")
if args[0].get("agent_query")
else None,
SpanAttributes.RETRIEVAL.RETRIEVED_CONTEXTS: [
ret.update["messages"][-1].content
]
if hasattr(ret, "update")
else "No tool call",
},
)
def web_research_node(
state: State,
) -> Command[Literal["executor"]]:
agent_query = state.get("agent_query")
result = web_search_agent.invoke({"messages": agent_query})
goto = "executor"
# wrap in a human message, as not all providers allow
# AI message at the last position of the input messages list
result["messages"][-1] = HumanMessage(
content=result["messages"][-1].content, name="web_researcher"
)
return Command(
update={
# share internal message history of research agent with other agents
"messages": result["messages"],
},
goto=goto,
)
5. Create Charting Agentยถ
from typing import Annotated, Literal
from langchain_core.tools import tool
from langchain_experimental.utilities import PythonREPL
repl = PythonREPL()
@tool
def python_repl_tool(
code: Annotated[str, "The python code to execute to generate your chart."],
):
"""Use this to execute python code. If you want to see the output of a value,
you should print it out with `print(...)`. This is visible to the user."""
try:
result = repl.run(code)
except BaseException as e:
return f"Failed to execute. Error: {repr(e)}"
result_str = (
f"Successfully executed:\n```python\n{code}\n```\nStdout: {result}"
)
return (
result_str
+ "\n\nIf you have completed all tasks, respond with FINAL ANSWER."
)
# Chart generator agent and node
# NOTE: THIS PERFORMS ARBITRARY CODE EXECUTION, WHICH CAN BE UNSAFE WHEN NOT SANDBOXED
chart_agent = create_react_agent(
llm,
[python_repl_tool],
prompt=agent_system_prompt(
"""
You can only generate charts. You are working with a researcher colleague.
1) Print the chart first.
2) Save the chart to a file in the current working directory.
3) At the very end of your message, output EXACTLY two lines so the summarizer can find them:
CHART_PATH: <relative_path_to_chart_file>
CHART_NOTES: <one concise sentence summarizing the main insight in the chart>
Do not include any other trailing text after these two lines.
"""
),
)
def chart_node(state: State) -> Command[Literal["chart_summarizer"]]:
result = chart_agent.invoke(state)
# wrap in a human message, as not all providers allow
# AI message at the last position of the input messages list
result["messages"][-1] = HumanMessage(
content=result["messages"][-1].content, name="chart_generator"
)
goto = "chart_summarizer"
return Command(
update={
# share internal message history of chart agent with other agents
"messages": result["messages"],
},
goto=goto,
)
6. Create Chart Summary Agentยถ
chart_summary_agent = create_react_agent(
llm,
tools=[], # Add image processing tools if available/needed.
prompt=agent_system_prompt(
"You can only generate image captions. You are working with a researcher colleague and a chart generator colleague. "
+ "Your task is to generate a standalone, concise summary for the provided chart image saved at a local PATH, where the PATH should be and only be provided by your chart generator colleague. The summary should be no more than 3 sentences and should not mention the chart itself."
),
)
def chart_summary_node(
state: State,
) -> Command[Literal[END]]:
result = chart_summary_agent.invoke(state)
print(f"Chart summarizer answer: {result['messages'][-1].content}")
# Send to the end node
goto = END
return Command(
update={
# share internal message history of chart agent with other agents
"messages": result["messages"],
"final_answer": result["messages"][-1].content,
},
goto=goto,
)
7. Create a Synthesizer (Text Summarizer) Agentยถ
llm = ChatOpenAI(model="gpt-4o")
def synthesizer_node(state: State) -> Command[Literal[END]]:
"""
Creates a concise, humanโreadable summary of the entire interaction,
**purely in prose**.
It ignores structured tables or chart IDs and instead rewrites the
relevant agent messages (research results, chart commentary, etc.)
into a short final answer.
"""
# Gather informative messages for final synthesis
relevant_msgs = [
m.content
for m in state.get("messages", [])
if getattr(m, "name", None)
in ("web_researcher", "chart_generator", "chart_summarizer")
]
user_question = state.get(
"user_query",
state.get("messages", [{}])[0].content if state.get("messages") else "",
)
synthesis_instructions = (
"You are the Synthesizer. Use the context below to directly answer the user's question. "
"Perform any lightweight calculations, comparisons, or inferences required. "
"Do not invent facts not supported by the context. If data is missing, say what's missing and, if helpful, "
"offer a clearly labeled best-effort estimate with assumptions.\n\n"
"Produce a concise response that fully answers the question, with the following guidance:\n"
"- Start with the direct answer (one short paragraph or a tight bullet list).\n"
"- Include key figures from any 'Results:' tables (e.g., totals, top items).\n"
"- If any message contains citations, include them as a brief 'Citations: [...]' line.\n"
"- Keep the output crisp; avoid meta commentary or tool instructions."
)
summary_prompt = [
HumanMessage(
content=(
f"User question: {user_question}\n\n"
f"{synthesis_instructions}\n\n"
f"Context:\n\n" + "\n\n---\n\n".join(relevant_msgs)
)
)
]
llm_reply = llm.invoke(summary_prompt)
answer = llm_reply.content.strip()
print(f"Sythesizer answer: {answer}")
return Command(
update={
"final_answer": answer,
"messages": [HumanMessage(content=answer, name="synthesizer")],
},
goto=END, # hand off to the END node
)
8. Build the Agent Graphยถ
from langgraph.graph import START
from langgraph.graph import StateGraph
workflow = StateGraph(State)
workflow.add_node("planner", planner_node)
workflow.add_node("executor", executor_node)
workflow.add_node("web_researcher", web_research_node)
workflow.add_node("chart_generator", chart_node)
workflow.add_node("chart_summarizer", chart_summary_node)
workflow.add_node("synthesizer", synthesizer_node)
workflow.add_edge(START, "planner")
graph = workflow.compile()
from IPython.display import Image
try:
display(Image(graph.get_graph().draw_mermaid_png()))
except Exception:
# This requires some extra dependencies and is optional
pass
9. Set up TruLens loggingยถ
from trulens.core.database.connector.default import DefaultDBConnector
from trulens.core.session import TruSession
# Initialize connector with SQLite database with custom name
connector = DefaultDBConnector(database_url="sqlite:///data_agent.sqlite")
# Create TruSession with the custom connector
session = TruSession(connector=connector)
session.reset_database()
10. Add evaluationsยถ
Here we add RAG triad evaluations to assess goal completion for data tasks (such as web search). We also add trace-level metrics that aim to surface specific issues at each step.
from trulens.providers.openai import OpenAI
# Use GPT-4o for RAG Triad Evaluations
provider = OpenAI(model_engine="gpt-4o")
import numpy as np
from trulens.core import Feedback
from trulens.core.feedback.selector import Selector
from trulens.otel.semconv.trace import SpanAttributes
# Define a groundedness feedback function
f_groundedness = (
Feedback(
provider.groundedness_measure_with_cot_reasons, name="Groundedness"
)
.on({
"source": Selector(
span_type=SpanAttributes.SpanType.RETRIEVAL,
span_attribute=SpanAttributes.RETRIEVAL.RETRIEVED_CONTEXTS,
collect_list=True,
)
})
.on_output()
)
# Question/answer relevance between overall question and answer.
f_answer_relevance = (
Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
.on_input()
.on_output()
)
# Context relevance between question and each context chunk.
f_context_relevance = (
Feedback(
provider.context_relevance_with_cot_reasons, name="Context Relevance"
)
.on({
"question": Selector(
span_type=SpanAttributes.SpanType.RETRIEVAL,
span_attribute=SpanAttributes.RETRIEVAL.QUERY_TEXT,
)
})
.on({
"context": Selector(
span_type=SpanAttributes.SpanType.RETRIEVAL,
span_attribute=SpanAttributes.RETRIEVAL.RETRIEVED_CONTEXTS,
collect_list=False,
)
})
.aggregate(np.mean)
)
trace_eval_provider = OpenAI(model_engine="gpt-4.1")
f_plan_quality = Feedback(
trace_eval_provider.plan_quality_with_cot_reasons,
name="Plan Quality",
).on({
"trace": Selector(trace_level=True),
})
f_plan_adherence = Feedback(
trace_eval_provider.plan_adherence_with_cot_reasons,
name="Plan Adherence",
).on({
"trace": Selector(trace_level=True),
})
f_execution_efficiency = Feedback(
trace_eval_provider.execution_efficiency_with_cot_reasons,
name="Execution Efficiency",
).on({
"trace": Selector(trace_level=True),
})
f_logical_consistency = Feedback(
trace_eval_provider.logical_consistency_with_cot_reasons,
name="Logical Consistency",
).on({
"trace": Selector(trace_level=True),
})
11. Register the agentยถ
from trulens.apps.langgraph import TruGraph
tru_recorder = TruGraph(
graph,
app_name="Web Search Data Agent",
app_version="Base",
feedbacks=[
f_answer_relevance,
f_context_relevance,
f_groundedness,
f_plan_quality,
f_plan_adherence,
f_execution_efficiency,
f_logical_consistency,
],
)
12. Use the Agentยถ
from langchain_core.messages import HumanMessage
query = "Chart the current market capitalization of the top 5 banks in the US?"
print(f"Query: {query}")
state = {
"messages": [HumanMessage(content=query)],
"user_query": query,
"enabled_agents": [
"web_researcher",
"chart_generator",
"chart_summarizer",
"synthesizer",
],
}
with tru_recorder as recording:
graph.invoke(state)
print("--------------------------------")
query = "Identify current regulatory changes for the financial services industry in the US."
print(f"Query: {query}")
state = {
"messages": [HumanMessage(content=query)],
"user_query": query,
"enabled_agents": [
"web_researcher",
"chart_generator",
"chart_summarizer",
"synthesizer",
],
}
with tru_recorder as recording:
graph.invoke(state)
print("--------------------------------")
13. See evaluation resultsยถ
You may need to run this step multiple times to see full results, as the LLM judge evaluations take time to compute.
session.get_leaderboard()
14. Launch TruLens Dashboardยถ
from trulens.dashboard import run_dashboard
run_dashboard()