Skip to content

Commit

Permalink
Clean up notebooks for blog chunking eval
Browse files Browse the repository at this point in the history
Signed-off-by: Christy Bergman <[email protected]>
  • Loading branch information
christy committed Jul 9, 2024
1 parent e7f9f26 commit 9c6756f
Show file tree
Hide file tree
Showing 6 changed files with 3,614 additions and 2,573 deletions.
49 changes: 49 additions & 0 deletions bootcamp/Evaluation/data/blog_eval_answers.csv

Large diffs are not rendered by default.

12 changes: 0 additions & 12 deletions bootcamp/Evaluation/data/ground_truth_answers.csv

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def assemble_ragas_dataset(input_df):
truth_list = input_df.ground_truth_answer.to_list()

# Get all the Milvus Retrieval Contexts as list[list[str]]
context_list = input_df.Custom_RAG_context.to_list()
context_list = input_df.recursive_context_512_k_2.to_list()
context_list = [[context] for context in context_list]

# Get all the RAG answers based on contexts.
Expand All @@ -34,7 +34,7 @@ def assemble_ragas_dataset(input_df):
def evaluate_ragas_model(pandas_eval_df,
ragas_eval_metrics,
what_to_evaluate='CONTEXTS',
cols_to_evaluate=['Custom_RAG_context', 'simple_context']):
cols_to_evaluate=['recursive_context_512_k_2', 'html_context_512_k_2']):
"""Evaluate the RAGAS model using the input pandas df."""

temp_df = pandas_eval_df.copy()
Expand All @@ -48,7 +48,7 @@ def evaluate_ragas_model(pandas_eval_df,
if what_to_evaluate == "CONTEXTS":
# Keep the Custom_RAG_answer as is.
# Replace the Custom_RAG_context with the col context.
temp_df['Custom_RAG_context'] = temp_df[col]
temp_df['recursive_context_512_k_2'] = temp_df[col]

# Replace the Custom_RAG_answer with the LLM answer to evaluate.
elif what_to_evaluate == "ANSWERS":
Expand Down Expand Up @@ -80,7 +80,8 @@ def evaluate_ragas_model(pandas_eval_df,
elif what_to_evaluate == "ANSWERS":
print(f"Evaluate LLM: {col}, ",end="")
# Calculate avg LLM answer scores across all floating point number scores between 0 and 1.
temp['avg_answer_score'] = (temp.answer_relevancy + temp.answer_similarity + temp.answer_correctness) / 3
# temp['avg_answer_score'] = (temp.answer_relevancy + temp.answer_similarity + temp.answer_correctness) / 3
temp['avg_answer_score'] = temp.answer_correctness
avg_answer_score = np.round(temp.avg_answer_score.mean(),4)
temp_score = avg_answer_score
print(f"avg_score: {temp_score}")
Expand Down
Loading

0 comments on commit 9c6756f

Please sign in to comment.