Cortex Chat + TruLens¶
This quickstart assumes you already have a Cortex Search Service started, JWT token created and Cortex Chat Private Preview enabled for your account. If you need assistance getting started with Cortex Chat, or having Cortex Chat Private Preview enabled please contact your Snowflake account contact.
Install required packages¶
! pip install trulens-core trulens-providers-cortex trulens-connectors-snowflake snowflake-sqlalchemy
Set JWT Token, Chat URL, and Search Service¶
import os
os.environ["SNOWFLAKE_JWT"] = "..."
os.environ["SNOWFLAKE_CHAT_URL"] = ".../api/v2/cortex/chat"
os.environ["SNOWFLAKE_CORTEX_SEARCH_SERVICE"] = "<database>.<schema>.<cortex search service name>"
Create a Cortex Chat App¶
The CortexChat
class below can be configured with your URL and model selection.
It contains two methods: handle_cortex_chat_response
, and chat
.
_handle_cortex_chat_response
serves to handle the streaming response, and expose the debugging information.chat
is a user-facing method that allows you to input aquery
and receive aresponse
andcitation
import requests
import json
from trulens.apps.custom import instrument
class CortexChat:
def __init__(self, url: str, cortex_search_service: str, model: str = "mistral-large"):
"""
Initializes a new instance of the CortexChat class.
Parameters:
url (str): The URL of the chat service.
model (str): The model to be used for chat. Defaults to "mistral-large".
cortex_search_service (str): The search service to be used for chat.
"""
self.url = url
self.model = model
self.cortex_search_service = cortex_search_service
@instrument
def _handle_cortex_chat_response(self, response: requests.Response) -> tuple[str, str, str]:
"""
Process the response from the Cortex Chat API.
Args:
response: The response object from the Cortex Chat API.
Returns:
A tuple containing the extracted text, citation, and debug information from the response.
"""
text = ""
citation = ""
debug_info = ""
previous_line = ""
for line in response.iter_lines():
if line:
decoded_line = line.decode('utf-8')
if decoded_line.startswith("event: done"):
if debug_info == "":
raise Exception("No debug information, required for TruLens feedback, provided by Cortex Chat API.")
return text, citation, debug_info
if previous_line.startswith("event: error"):
error_data = json.loads(decoded_line[5:])
error_code = error_data["code"]
error_message = error_data["message"]
raise Exception(f"Error event received from Cortex Chat API. Error code: {error_code}, Error message: {error_message}")
else:
if decoded_line.startswith('data:'):
try:
data = json.loads(decoded_line[5:])
if data['delta']['content'][0]['type'] == "text":
print(data['delta']['content'][0]['text']['value'], end = '')
text += data['delta']['content'][0]['text']['value']
if data['delta']['content'][0]['type'] == "citation":
citation = data['delta']['content'][0]['citation']
if data['delta']['content'][0]['type'] == "debug_info":
debug_info = data['delta']['content'][0]['debug_info']
except json.JSONDecodeError:
raise Exception(f"Error decoding JSON: {decoded_line} from {previous_line}")
previous_line = decoded_line
@instrument
def chat(self, query: str) -> tuple[str, str]:
"""
Sends a chat query to the Cortex Chat API and returns the response.
Args:
query (str): The chat query to send.
Returns:
tuple: A tuple containing the text response and citation.
Raises:
None
Example:
cortex = CortexChat()
response = cortex.chat("Hello, how are you?")
print(response)
("I'm good, thank you!", "Cortex Chat API v1.0")
"""
url = self.url
headers = {
'X-Snowflake-Authorization-Token-Type': 'KEYPAIR_JWT',
'Content-Type': 'application/json',
'Accept': 'application/json',
'Authorization': f"Bearer {os.environ.get('SNOWFLAKE_JWT')}"
}
data = {
"query": query,
"model": self.model,
"debug": True,
"search_services": [{
"name": self.cortex_search_service,
"max_results": 10,
}],
"prompt": "{{.Question}} {{.Context}}",
}
response = requests.post(url, headers=headers, json=data, stream=True)
if response.status_code == 200:
text, citation, _ = self._handle_cortex_chat_response(response)
return text, citation
else:
print(f"Error: {response.status_code} - {response.text}")
cortex = CortexChat(os.environ["SNOWFLAKE_CHAT_URL"], os.environ["SNOWFLAKE_SEARCH_SERVICE"])
Start a TruLens session¶
Start a TruLens session connected to Snowflake so we can log traces and evaluations in our Snowflake account.
Learn more about how to log in Snowflake.
from trulens.core import TruSession
from trulens.connectors.snowflake import SnowflakeConnector
connection_params = {
"account": "...",
"user": "...",
"password": "...",
"database": "...",
"schema": "...",
"warehouse": "...",
"role": "...",
"init_server_side": False,
}
connector = SnowflakeConnector(**connection_params)
session = TruSession(connector=connector)
session.reset_database()
Create Feedback Functions¶
Here we initialize the RAG Triad to provide feedback on the Chat API responses.
If you'd like, you can also choose from a wide variety of stock feedback functions or even create custom feedback functions.
import numpy as np
from trulens.core import Feedback
from trulens.core import Select
from trulens.providers.cortex import Cortex
from snowflake.snowpark.session import Session
snowpark_session = Session.builder.configs(connection_params).create()
provider = Cortex(snowpark_session, "llama3.1-8b")
# Question/answer relevance between overall question and answer.
f_answer_relevance = (
Feedback(provider.relevance_with_cot_reasons, name="Answer Relevance")
.on_input()
.on_output()
)
# Define a groundedness feedback function
f_groundedness = (
Feedback(
provider.groundedness_measure_with_cot_reasons, name="Groundedness"
)
.on(Select.RecordCalls._handle_cortex_chat_response.rets[2]["retrieved_results"].collect())
.on_output()
)
# Context relevance between question and each context chunk.
f_context_relevance = (
Feedback(
provider.context_relevance_with_cot_reasons, name="Context Relevance"
)
.on_input()
.on(Select.RecordCalls._handle_cortex_chat_response.rets[2]["retrieved_results"][:])
.aggregate(np.mean) # choose a different aggregation method if you wish
)
Initialize the TruLens recorder and run the app¶
from trulens.apps.custom import TruCustomApp
tru_recorder = TruCustomApp(
cortex,
app_name="Cortex Chat",
app_version="mistral-large",
feedbacks=[f_answer_relevance, f_groundedness, f_context_relevance],
)
with tru_recorder as recording:
# Example usage
user_query = "Hello! What kind of service does Gregory have?"
cortex.chat(user_query)
Start the dashboard¶
from trulens.dashboard import run_dashboard
run_dashboard(session)