from uptrain.framework import Check
from uptrain.operators import ModelGradeScore, OpenAIGradeScore, Histogram
check = Check(
name="Eval scores",
operators=[
## Add your checks here
# First is the openai grade eval
OpenAIGradeScore(
col_in_input="document_text",
col_in_completion="answer",
eval_name="coqa-closedqa-correct",
col_out="openai_eval_score",
),
# Second is our custom grading eval
ModelGradeScore(
grading_prompt_template="""
You are an evidence-driven LLM that places high importance on supporting facts and references. You diligently verify claims and check for evidence within the document to ensure answers rely on reliable information and align with the documented evidence.",
You are comparing an answer pulled from the document for a given question
Here is the data:
[BEGIN DATA]
************
[Document]: {document}
************
[Question]: {user_query}
************
[Submitted answer]: {generated_answer}
************
[END DATA]
Compare the factual content of the submitted answer with the document as well as evaluate how well it answers the given question. Ignore any differences in style, grammar, or punctuation.
Answer the question by selecting one of the following options:
(A) The submitted answer is a subset of the document and answers the question correctly.
(B) The submitted answer is a subset of the document but is not an appropriate answer for the question.
(C) The submitted quote is a superset of the document but is consistent with the document as well as answers the question correctly.
(D) The submitted quote is a superset of the document but is consistent with the document although it does not answer the question correctly.
(E) The submitted quote is a superset of the document and is not consistent with the document.
""",
eval_type="cot_classify",
choice_strings=["A", "B", "C", "D", "E"],
choice_scores={"A": 1.0, "B": 0.2, "C": 0.5, "D": 0.1, "E": 0.0},
context_vars={
"document": "document_text",
"user_query": "question",
"generated_answer": "answer",
},
col_out="custom_eval_score",
),
],
plots = [Histogram(x="custom_eval_score", nbins=5)]
)