# Imports main tools:
from langchain.chains import LLMChain
from langchain.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate
from langchain.prompts import PromptTemplate
from langchain_community.llms import OpenAI
from trulens.apps.langchain import TruChain
from trulens.core import Feedback
from trulens.core import TruSession
from trulens.providers.huggingface import Huggingface
session = TruSession()
TruSession().migrate_database()
full_prompt = HumanMessagePromptTemplate(
prompt=PromptTemplate(
template="Provide a helpful response with relevant background information for the following: {prompt}",
input_variables=["prompt"],
)
)
chat_prompt_template = ChatPromptTemplate.from_messages([full_prompt])
llm = OpenAI(temperature=0.9, max_tokens=128)
chain = LLMChain(llm=llm, prompt=chat_prompt_template, verbose=True)
truchain = TruChain(chain, app_name="ChatApplication", app_version="Chain1")
with truchain:
chain("This will be automatically logged.")
Feedback functions can also be logged automatically by providing them in a list to the feedbacks arg.
# Initialize Huggingface-based feedback function collection class:
hugs = Huggingface()
# Define a language match feedback function using HuggingFace.
f_lang_match = Feedback(hugs.language_match).on_input_output()
# By default this will check language match on the main app input and main app
# output.
truchain = TruChain(
chain,
app_name="ChatApplication",
app_version="Chain1",
feedbacks=[f_lang_match], # feedback functions
)
with truchain:
chain("This will be automatically logged.")
tc = TruChain(chain, app_name="ChatApplication", app_version="Chain2")
Set up logging and instrumentation¶
Making the first call to your wrapped LLM Application will now also produce a log or "record" of the chain execution.
prompt_input = "que hora es?"
gpt3_response, record = tc.with_record(chain.__call__, prompt_input)
We can log the records but first we need to log the chain itself.
session.add_app(app=truchain)
Then we can log the record:
session.add_record(record)
Log App Feedback¶
Capturing app feedback such as user feedback of the responses can be added with one call.
thumb_result = True
session.add_feedback(
name="👍 (1) or 👎 (0)", record_id=record.record_id, result=thumb_result
)
Evaluate Quality¶
Following the request to your app, you can then evaluate LLM quality using feedback functions. This is completed in a sequential call to minimize latency for your application, and evaluations will also be logged to your local machine.
To get feedback on the quality of your LLM, you can use any of the provided feedback functions or add your own.
To assess your LLM quality, you can provide the feedback functions to
session.run_feedback()
in a list provided to feedback_functions
.
feedback_results = session.run_feedback_functions(
record=record, feedback_functions=[f_lang_match]
)
for result in feedback_results:
display(result)
After capturing feedback, you can then log it to your local database.
session.add_feedbacks(feedback_results)
Out-of-band Feedback evaluation¶
In the above example, the feedback function evaluation is done in the same
process as the chain evaluation. The alternative approach is the use the
provided persistent evaluator started via
session.start_deferred_feedback_evaluator
. Then specify the feedback_mode
for
TruChain
as deferred
to let the evaluator handle the feedback functions.
For demonstration purposes, we start the evaluator here but it can be started in another process.
truchain: TruChain = TruChain(
chain,
app_name="ChatApplication",
app_version="chain_1",
feedbacks=[f_lang_match],
feedback_mode="deferred",
)
with truchain:
chain("This will be logged by deferred evaluator.")
session.start_evaluator()
# session.stop_evaluator()