Add examples in evaluation docstring (#156)

* minor changes in docstring * Update polaris/benchmark/_base.py Co-authored-by: Andrew Quirke <[email protected]> * Update polaris/benchmark/_base.py Co-authored-by: Andrew Quirke <[email protected]> * update docstrings * minor changes * Update polaris/benchmark/_base.py Co-authored-by: Andrew Quirke <[email protected]> --------- Co-authored-by: Andrew Quirke <[email protected]>
polaris-hub · Jul 25, 2024 · 9173e2a · 9173e2a
1 parent 653c46d
commit 9173e2a
Show file tree

Hide file tree

Showing 2 changed files with 18 additions and 0 deletions.
diff --git a/polaris/benchmark/_base.py b/polaris/benchmark/_base.py
@@ -406,6 +406,19 @@ def evaluate(
 
         Returns:
             A `BenchmarkResults` object. This object can be directly submitted to the Polaris Hub.
+
+
+        Examples:
+            1. For regression benchmarks:
+                pred_scores = your_model.predict_score(molecules) # predict continuous score values
+                benchmark.evaluate(y_pred=pred_scores)
+            2. For classification benchmarks:
+                - If `roc_auc` and `pr_auc` are in the metric list, both class probabilities and label predictions are required:
+                    pred_probs = your_model.predict_proba(molecules) # predict probablities
+                    pred_labels = your_model.predict_labels(molecules) # predict class labels
+                    benchmark.evaluate(y_pred=pred_labels, y_prob=pred_probs)
+                - Otherwise:
+                    benchmark.evaluate(y_pred=pred_labels)
         """
 
         # Instead of having the user pass the ground truth, we extract it from the benchmark spec ourselves.

diff --git a/tests/test_evaluate.py b/tests/test_evaluate.py
@@ -172,6 +172,11 @@ def test_metric_y_types(
         test_single_task_benchmark_clf.metrics = [Metric.roc_auc]
         test_single_task_benchmark_clf.evaluate(y_pred=predictions)
 
+    # If y_type != "y_pred" and y_prob is None, an error is thrown.
+    with pytest.raises(ValueError, match="Metric.pr_auc requires `y_prob` input"):
+        test_single_task_benchmark_clf.metrics = [Metric.pr_auc]
+        test_single_task_benchmark_clf.evaluate(y_pred=predictions)
+
     # If y_type != "y_pred" and y_pred is not None and y_prob is not None, it uses y_prob as expected!
     test_single_task_benchmark_clf.metrics = [Metric.roc_auc]
     result = test_single_task_benchmark_clf.evaluate(y_pred=predictions, y_prob=probabilities)