Evaluating Sentence Window RAG¶
In this notebook, we use the SentenceWindowNodeParser to parse documents into single sentences per node. Each node also contains a "window" with the sentences on either side of the node sentence.
Then, after retrieval, before passing the retrieved sentences to the LLM, the single sentences are replaced with a window containing the surrounding sentences using the MetadataReplacementNodePostProcessor.
Last we will show how to evaluate retrieval in this setting, and compare to base VectorStoreIndex.
In [ ]:
Copied!
# !pip install trulens trulens-apps-llamaindex trulens-providers-openai llama_index==0.10.11 sentence-transformers transformers pypdf gdown
# !pip install trulens trulens-apps-llamaindex trulens-providers-openai llama_index==0.10.11 sentence-transformers transformers pypdf gdown
Query Engine Construction¶
In [ ]:
Copied!
import os
import openai
from trulens.core import Feedback
from trulens.core import FeedbackMode
from trulens.core import Select
from trulens.core import TruSession
from trulens.apps.llamaindex import TruLlama
from trulens.providers.openai import OpenAI as fOpenAI
session = TruSession()
session.reset_database()
os.environ["OPENAI_API_KEY"] = "sk-proj-..."
openai.api_key = os.environ["OPENAI_API_KEY"]
import os
import openai
from trulens.core import Feedback
from trulens.core import FeedbackMode
from trulens.core import Select
from trulens.core import TruSession
from trulens.apps.llamaindex import TruLlama
from trulens.providers.openai import OpenAI as fOpenAI
session = TruSession()
session.reset_database()
os.environ["OPENAI_API_KEY"] = "sk-proj-..."
openai.api_key = os.environ["OPENAI_API_KEY"]
In [ ]:
Copied!
!curl https://www.ipcc.ch/report/ar6/wg2/downloads/report/IPCC_AR6_WGII_Chapter03.pdf --output IPCC_AR6_WGII_Chapter03.pdf
!curl https://www.ipcc.ch/report/ar6/wg2/downloads/report/IPCC_AR6_WGII_Chapter03.pdf --output IPCC_AR6_WGII_Chapter03.pdf
In [ ]:
Copied!
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader(
input_files=["./IPCC_AR6_WGII_Chapter03.pdf"]
).load_data()
from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader(
input_files=["./IPCC_AR6_WGII_Chapter03.pdf"]
).load_data()
In [ ]:
Copied!
# sentence-window index
!gdown "https://drive.google.com/uc?id=16pH4NETEs43dwJUvYnJ9Z-bsR9_krkrP"
!tar -xzf sentence_index.tar.gz
# sentence-window index
!gdown "https://drive.google.com/uc?id=16pH4NETEs43dwJUvYnJ9Z-bsR9_krkrP"
!tar -xzf sentence_index.tar.gz
In [ ]:
Copied!
# Merge into a single large document rather than one document per-page
from llama_index.core import Document
document = Document(text="\n\n".join([doc.text for doc in documents]))
# Merge into a single large document rather than one document per-page
from llama_index.core import Document
document = Document(text="\n\n".join([doc.text for doc in documents]))
In [ ]:
Copied!
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter
# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
window_size=3,
window_metadata_key="window",
original_text_metadata_key="original_text",
)
llm = OpenAI(model="gpt-4o-mini", temperature=0.1)
text_splitter = SentenceSplitter()
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
Settings.node_parser = node_parser
Settings.text_splitter = text_splitter
Settings.llm = llm
from llama_index.core.settings import Settings
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.node_parser import SentenceSplitter
# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
window_size=3,
window_metadata_key="window",
original_text_metadata_key="original_text",
)
llm = OpenAI(model="gpt-4o-mini", temperature=0.1)
text_splitter = SentenceSplitter()
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")
Settings.node_parser = node_parser
Settings.text_splitter = text_splitter
Settings.llm = llm
In [ ]:
Copied!
from llama_index.core import VectorStoreIndex
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes = text_splitter.get_nodes_from_documents(documents)
sentence_index = VectorStoreIndex(nodes)
base_index = VectorStoreIndex(base_nodes)
from llama_index.core import VectorStoreIndex
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes = text_splitter.get_nodes_from_documents(documents)
sentence_index = VectorStoreIndex(nodes)
base_index = VectorStoreIndex(base_nodes)
Define Evals¶
In [ ]:
Copied!
import numpy as np
# Initialize OpenAI provider
provider = fOpenAI()
# Helpfulness
f_helpfulness = Feedback(provider.helpfulness).on_output()
# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(provider.relevance_with_cot_reasons).on_input_output()
# Question/statement relevance between question and each context chunk with context reasoning.
# The context is located in a different place for the sub questions so we need to define that feedback separately
f_context_relevance_subquestions = (
Feedback(provider.context_relevance_with_cot_reasons)
.on_input()
.on(Select.Record.calls[0].rets.source_nodes[:].node.text)
.aggregate(np.mean)
)
f_context_relevance = (
Feedback(provider.context_relevance_with_cot_reasons)
.on_input()
.on(TruLlama.select_context())
.aggregate(np.mean)
)
# Initialize groundedness
# Groundedness with chain of thought reasoning
# Similar to context relevance, we'll follow a strategy of defining it twice for the subquestions and overall question.
f_groundedness_subquestions = (
Feedback(provider.groundedness_measure_with_cot_reasons)
.on(Select.Record.calls[0].rets.source_nodes[:].node.text.collect())
.on_output()
)
f_groundedness = (
Feedback(provider.groundedness_measure_with_cot_reasons)
.on(TruLlama.select_context())
.on_output()
)
import numpy as np
# Initialize OpenAI provider
provider = fOpenAI()
# Helpfulness
f_helpfulness = Feedback(provider.helpfulness).on_output()
# Question/answer relevance between overall question and answer.
f_qa_relevance = Feedback(provider.relevance_with_cot_reasons).on_input_output()
# Question/statement relevance between question and each context chunk with context reasoning.
# The context is located in a different place for the sub questions so we need to define that feedback separately
f_context_relevance_subquestions = (
Feedback(provider.context_relevance_with_cot_reasons)
.on_input()
.on(Select.Record.calls[0].rets.source_nodes[:].node.text)
.aggregate(np.mean)
)
f_context_relevance = (
Feedback(provider.context_relevance_with_cot_reasons)
.on_input()
.on(TruLlama.select_context())
.aggregate(np.mean)
)
# Initialize groundedness
# Groundedness with chain of thought reasoning
# Similar to context relevance, we'll follow a strategy of defining it twice for the subquestions and overall question.
f_groundedness_subquestions = (
Feedback(provider.groundedness_measure_with_cot_reasons)
.on(Select.Record.calls[0].rets.source_nodes[:].node.text.collect())
.on_output()
)
f_groundedness = (
Feedback(provider.groundedness_measure_with_cot_reasons)
.on(TruLlama.select_context())
.on_output()
)
Querying¶
In [ ]:
Copied!
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from trulens.apps.llamaindex import TruLlama
sentence_query_engine = sentence_index.as_query_engine(
similarity_top_k=2,
node_postprocessors=[
MetadataReplacementPostProcessor(target_metadata_key="window")
],
)
tru_sentence_query_engine_recorder = TruLlama(
sentence_query_engine,
app_name="climate query engine",
app_version="sentence_window_index",
feedbacks=[
f_qa_relevance,
f_context_relevance,
f_groundedness,
f_helpfulness,
],
)
with tru_sentence_query_engine_recorder:
sentence_query_engine.query("What are the concerns surrounding the AMOC?")
from llama_index.core.postprocessor import MetadataReplacementPostProcessor
from trulens.apps.llamaindex import TruLlama
sentence_query_engine = sentence_index.as_query_engine(
similarity_top_k=2,
node_postprocessors=[
MetadataReplacementPostProcessor(target_metadata_key="window")
],
)
tru_sentence_query_engine_recorder = TruLlama(
sentence_query_engine,
app_name="climate query engine",
app_version="sentence_window_index",
feedbacks=[
f_qa_relevance,
f_context_relevance,
f_groundedness,
f_helpfulness,
],
)
with tru_sentence_query_engine_recorder:
sentence_query_engine.query("What are the concerns surrounding the AMOC?")
Contrast with normal VectorStoreIndex¶
In [ ]:
Copied!
query_engine = base_index.as_query_engine(similarity_top_k=2)
tru_query_engine_recorder = TruLlama(
query_engine,
app_name="climate query engine",
app_version="vector_store_index",
feedbacks=[
f_qa_relevance,
f_context_relevance,
f_groundedness,
f_helpfulness,
],
)
with tru_query_engine_recorder:
query_engine.query("What are the concerns surrounding the AMOC?")
query_engine = base_index.as_query_engine(similarity_top_k=2)
tru_query_engine_recorder = TruLlama(
query_engine,
app_name="climate query engine",
app_version="vector_store_index",
feedbacks=[
f_qa_relevance,
f_context_relevance,
f_groundedness,
f_helpfulness,
],
)
with tru_query_engine_recorder:
query_engine.query("What are the concerns surrounding the AMOC?")
Also Compare with Sub-Question Query Engine + Sentence Window Engine¶
In [ ]:
Copied!
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool
from llama_index.core.tools import ToolMetadata
subquestion_query_engine = SubQuestionQueryEngine.from_defaults(
[
QueryEngineTool(
query_engine=sentence_query_engine,
metadata=ToolMetadata(
name="climate_report", description="Climate Report on Oceans."
),
)
],
verbose=False,
)
tru_subquestion_query_engine_recorder = TruLlama(
subquestion_query_engine,
app_name="climate query engine",
app_version="sub_question_query_engine",
feedbacks=[
f_qa_relevance,
f_context_relevance,
f_context_relevance_subquestions,
f_groundedness,
f_groundedness_subquestions,
f_helpfulness,
],
)
with tru_subquestion_query_engine_recorder:
subquestion_query_engine.query("What are the concerns surrounding the AMOC?")
from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool
from llama_index.core.tools import ToolMetadata
subquestion_query_engine = SubQuestionQueryEngine.from_defaults(
[
QueryEngineTool(
query_engine=sentence_query_engine,
metadata=ToolMetadata(
name="climate_report", description="Climate Report on Oceans."
),
)
],
verbose=False,
)
tru_subquestion_query_engine_recorder = TruLlama(
subquestion_query_engine,
app_name="climate query engine",
app_version="sub_question_query_engine",
feedbacks=[
f_qa_relevance,
f_context_relevance,
f_context_relevance_subquestions,
f_groundedness,
f_groundedness_subquestions,
f_helpfulness,
],
)
with tru_subquestion_query_engine_recorder:
subquestion_query_engine.query("What are the concerns surrounding the AMOC?")
In [ ]:
Copied!
from trulens.dashboard import run_dashboard
run_dashboard(session)
from trulens.dashboard import run_dashboard
run_dashboard(session)
In [ ]:
Copied!
questions = [
"Based on the provided text, discuss the impact of human activities on the natural carbon dynamics of estuaries, shelf seas, and other intertidal and shallow-water habitats. Provide examples from the text to support your answer.",
"Analyze the combined effects of exploitation and multi-decadal climate fluctuations on global fisheries yields. How do these factors make it difficult to assess the impacts of global climate change on fisheries yields? Use specific examples from the text to support your analysis.",
"Based on the study by Gutiérrez-Rodríguez, A.G., et al., 2018, what potential benefits do seaweeds have in the field of medicine, specifically in relation to cancer treatment?",
"According to the research conducted by Haasnoot, M., et al., 2020, how does the uncertainty in Antarctic mass-loss impact the coastal adaptation strategy of the Netherlands?",
"Based on the context, explain how the decline in warm water coral reefs is projected to impact the services they provide to society, particularly in terms of coastal protection.",
"Tell me something about the intricacies of tying a tie.",
]
questions = [
"Based on the provided text, discuss the impact of human activities on the natural carbon dynamics of estuaries, shelf seas, and other intertidal and shallow-water habitats. Provide examples from the text to support your answer.",
"Analyze the combined effects of exploitation and multi-decadal climate fluctuations on global fisheries yields. How do these factors make it difficult to assess the impacts of global climate change on fisheries yields? Use specific examples from the text to support your analysis.",
"Based on the study by Gutiérrez-Rodríguez, A.G., et al., 2018, what potential benefits do seaweeds have in the field of medicine, specifically in relation to cancer treatment?",
"According to the research conducted by Haasnoot, M., et al., 2020, how does the uncertainty in Antarctic mass-loss impact the coastal adaptation strategy of the Netherlands?",
"Based on the context, explain how the decline in warm water coral reefs is projected to impact the services they provide to society, particularly in terms of coastal protection.",
"Tell me something about the intricacies of tying a tie.",
]