Skip to content

Commit

Permalink
Limit metrics in generation stage
Browse files Browse the repository at this point in the history
  • Loading branch information
aravind10x committed Dec 22, 2024
1 parent 983cd92 commit 149c92e
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 8 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ license = {text = "Apache-2.0"}

dependencies = [
"chromadb",
"datasets>=2.18.0",
"fastapi>=0.100.0",
"jinja2",
"langchain>=0.1.0",
Expand Down
13 changes: 10 additions & 3 deletions src/ragbuilder/core/telemetry.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,15 @@ def eval_datagen_span(self, **attributes):

except Exception as e:
self._safe_set_attribute(span, "generation_success", False)
self._safe_set_attribute(span, "error_type", e.__class__.__name__)
self._safe_set_attribute(span, "error_message", str(e))
raise e

except Exception as e:
logger.debug(f"Error in eval data generation span: {e}")
yield None
if span is None:
yield None
raise

@contextmanager
def optimization_span(self, module: ModuleType, config: Dict[str, Any]):
Expand Down Expand Up @@ -293,8 +297,11 @@ def track_error(self, module: ModuleType, error: Exception, context: Dict[str, A
self._safe_add_counter(self.errors, 1, {"module": module, "error_type": error.__class__.__name__})

def flush(self):
if self.enabled and self.meter_provider:
self.meter_provider.force_flush()
try:
if self.enabled and self.meter_provider:
self.meter_provider.force_flush()
except Exception as e:
logger.debug(f"Error flushing telemetry: {e}")

def shutdown(self):
if self.enabled and self.meter_provider:
Expand Down
11 changes: 6 additions & 5 deletions src/ragbuilder/generation/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,10 @@ def evaluate(self, eval_dataset: Dataset, llm=AzureChatOpenAI(model="gpt-4o-mini
eval_dataset,
metrics=[
answer_correctness,
faithfulness,
answer_relevancy,
context_precision,
context_recall,
# faithfulness,
# answer_relevancy,
# context_precision,
# context_recall,
],
raise_exceptions=False,
llm=llm,
Expand All @@ -92,7 +92,8 @@ def evaluate(self, eval_dataset: Dataset, llm=AzureChatOpenAI(model="gpt-4o-mini
result_df = result.to_pandas()
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
output_csv_path = 'rag_eval_results_' + timestamp + '.csv'
selected_columns = ["prompt_key", "prompt", "question", "answer", "ground_truth", "answer_correctness", "faithfulness", "answer_relevancy", "context_precision", "context_recall", 'config']
# selected_columns = ["prompt_key", "prompt", "question", "answer", "ground_truth", "answer_correctness", "faithfulness", "answer_relevancy", "context_precision", "context_recall", 'config']
selected_columns = ["prompt_key", "prompt", "question", "answer", "ground_truth", "answer_correctness", 'config']
result_df[selected_columns].to_csv(output_csv_path, index=False)

self.logger.debug("Prompt evaluation completed")
Expand Down

0 comments on commit 149c92e

Please sign in to comment.