# Imports main tools:
from trulens_eval import Feedback
from trulens_eval import Huggingface
from trulens_eval import Tru
from trulens_eval import TruChain
tru = Tru()
Tru().migrate_database()
from langchain.chains import LLMChain
from langchain_community.llms import OpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate
from langchain.prompts import PromptTemplate
full_prompt = HumanMessagePromptTemplate(
prompt=PromptTemplate(
template=
"Provide a helpful response with relevant background information for the following: {prompt}",
input_variables=["prompt"],
)
)
chat_prompt_template = ChatPromptTemplate.from_messages([full_prompt])
llm = OpenAI(temperature=0.9, max_tokens=128)
chain = LLMChain(llm=llm, prompt=chat_prompt_template, verbose=True)
truchain = TruChain(
chain,
app_id='Chain1_ChatApplication',
tru=tru
)
with truchain:
chain("This will be automatically logged.")
Feedback functions can also be logged automatically by providing them in a list to the feedbacks arg.
# Initialize Huggingface-based feedback function collection class:
hugs = Huggingface()
# Define a language match feedback function using HuggingFace.
f_lang_match = Feedback(hugs.language_match).on_input_output()
# By default this will check language match on the main app input and main app
# output.
truchain = TruChain(
chain,
app_id='Chain1_ChatApplication',
feedbacks=[f_lang_match], # feedback functions
tru=tru
)
with truchain:
chain("This will be automatically logged.")
tc = TruChain(chain, app_id='Chain1_ChatApplication')
Set up logging and instrumentation¶
Making the first call to your wrapped LLM Application will now also produce a log or "record" of the chain execution.
prompt_input = 'que hora es?'
gpt3_response, record = tc.with_record(chain.__call__, prompt_input)
We can log the records but first we need to log the chain itself.
tru.add_app(app=truchain)
Then we can log the record:
tru.add_record(record)
Log App Feedback¶
Capturing app feedback such as user feedback of the responses can be added with one call.
thumb_result = True
tru.add_feedback(
name="👍 (1) or 👎 (0)",
record_id=record.record_id,
result=thumb_result
)
Evaluate Quality¶
Following the request to your app, you can then evaluate LLM quality using feedback functions. This is completed in a sequential call to minimize latency for your application, and evaluations will also be logged to your local machine.
To get feedback on the quality of your LLM, you can use any of the provided feedback functions or add your own.
To assess your LLM quality, you can provide the feedback functions to
tru.run_feedback()
in a list provided to feedback_functions
.
feedback_results = tru.run_feedback_functions(
record=record,
feedback_functions=[f_lang_match]
)
for result in feedback_results:
display(result)
After capturing feedback, you can then log it to your local database.
tru.add_feedbacks(feedback_results)
Out-of-band Feedback evaluation¶
In the above example, the feedback function evaluation is done in the same
process as the chain evaluation. The alternative approach is the use the
provided persistent evaluator started via
tru.start_deferred_feedback_evaluator
. Then specify the feedback_mode
for
TruChain
as deferred
to let the evaluator handle the feedback functions.
For demonstration purposes, we start the evaluator here but it can be started in another process.
truchain: TruChain = TruChain(
chain,
app_id='Chain1_ChatApplication',
feedbacks=[f_lang_match],
tru=tru,
feedback_mode="deferred"
)
with truchain:
chain("This will be logged by deferred evaluator.")
tru.start_evaluator()
# tru.stop_evaluator()