asreview · jteijema · Aug 27, 2024 · Aug 27, 2024 · Sep 4, 2024 · Sep 4, 2024
diff --git a/asreviewcontrib/insights/algorithms.py b/asreviewcontrib/insights/algorithms.py
@@ -1,4 +1,5 @@
 import numpy as np
+from sklearn import metrics
 
 
 def _recall_values(labels, x_absolute=False, y_absolute=False):
@@ -19,6 +20,14 @@ def _recall_values(labels, x_absolute=False, y_absolute=False):
     return x.tolist(), y.tolist()
 
 
+def _loss_value(labels):
+    positive_doc_ratio = sum(labels) / len(labels)
+    triangle_before_perfect_recall = positive_doc_ratio * 0.5
+    aera_under_recall_curve = metrics.auc(*_recall_values(labels))
+
+    return 1 - (triangle_before_perfect_recall + aera_under_recall_curve)
+
+
 def _wss_values(labels, x_absolute=False, y_absolute=False):
     n_docs = len(labels)
     n_pos_docs = sum(labels)

diff --git a/asreviewcontrib/insights/metrics.py b/asreviewcontrib/insights/metrics.py
@@ -6,6 +6,7 @@
 from asreviewcontrib.insights.algorithms import _erf_values
 from asreviewcontrib.insights.algorithms import _fn_values
 from asreviewcontrib.insights.algorithms import _fp_values
+from asreviewcontrib.insights.algorithms import _loss_value
 from asreviewcontrib.insights.algorithms import _recall_values
 from asreviewcontrib.insights.algorithms import _tn_values
 from asreviewcontrib.insights.algorithms import _tp_values
@@ -169,6 +170,24 @@ def _tnr(labels, intercept, x_absolute=False):
 
     return _slice_metric(x, y, intercept)
 
+def loss(state_obj, priors=False):
+    """
+    Computes a loss value that represents how far the recall curve is from
+    perfect recall.
+
+    The function calculates a value based on the area over the recall curve and
+    under the perfect recall (i.e., an impossible area for recall values).
+
+    Returns:
+        float: The loss value representing the distance from perfect recall.
+    """
+    labels = _pad_simulation_labels(state_obj, priors=priors)
+
+    return _loss(labels)
+
+def _loss(labels):
+    return _loss_value(labels)
+
 
 def get_metrics(
     state_obj,

diff --git a/pyproject.toml b/pyproject.toml
@@ -15,7 +15,7 @@ classifiers = [
     "Programming Language :: Python :: 3.11"
 ]
 license = {text = "Apache-2.0"}
-dependencies = ["numpy", "matplotlib", "asreview>=1,<2"]
+dependencies = ["numpy", "matplotlib", "asreview>=1,<2", "scikit-learn"]
 dynamic = ["version"]
 requires-python = ">=3.7"