# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Youden index metric."""
import datasets
import evaluate
import numpy as np
from sklearn.metrics import (
    precision_score,
    roc_auc_score,
    roc_curve,
)

_DESCRIPTION = """
This metric computes the Youden index based on the area under the curve (AUC) for the Receiver Operating Characteristic Curve (ROC). 
The return values represent the ideal point in the ROC curve, where max(TPR - FPR) holds true (across all points in the curve).
This metric only works with binary labels: The case in which there are only two different label classes, and each example gets only one label.
The prediction_scores are the probabilities for the positive class.
"""

_KWARGS_DESCRIPTION = """
Args:
- references (array-like of shape (n_samples,)): Ground truth labels.
    - binary: expects an array-like of shape (n_samples,)
- prediction_scores (array-like of shape (n_samples,)): Model predictions, probailities of the positive class.
    - binary: expects an array-like of shape (n_samples,)
Returns:
     Dict[str, float]: Returns threshold (for classification), sensitivity and specificity at the optimal Youden index.
"""

_CITATION = """\
@article{youden1950index,
  title={Index for rating diagnostic tests},
  author={Youden, William J},
  journal={Cancer},
  volume={3},
  number={1},
  pages={32--35},
  year={1950},
  publisher={Wiley Online Library}
}
@article{fluss2005estimation,
  title={Estimation of the Youden Index and its associated cutoff point},
  author={Fluss, Ronen and Faraggi, David and Reiser, Benjamin},
  journal={Biometrical Journal: Journal of Mathematical Methods in Biosciences},
  volume={47},
  number={4},
  pages={458--472},
  year={2005},
  publisher={Wiley Online Library}
}
@article{scikit-learn,
title={Scikit-learn: Machine Learning in {P}ython},
author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
journal={Journal of Machine Learning Research},
volume={12},
pages={2825--2830},
year={2011}
}
"""


@evaluate.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
class YoudenIndex(evaluate.Metric):
    def _info(self):
        return evaluate.MetricInfo(
            description=_DESCRIPTION,
            citation=_CITATION,
            inputs_description=_KWARGS_DESCRIPTION,
            features=datasets.Features(
                {
                    "prediction_scores": datasets.Value("float"),
                    "references": datasets.Value("int32"),
                }
            ),
            reference_urls=[
                "https://en.wikipedia.org/wiki/Youden%27s_J_statistic",
                "https://gist.github.com/twolodzko/4fae2980a1f15f8682d243808e5859bb",
            ],
        )

    def _compute(
        self,
        references,
        prediction_scores,
    ):
        """Compute the Youden index to determine the optimal threshold for classification.

        Args:
            references (array-like of shape (n_samples,)): Binary ground truth labels.
            prediction_scores (array-like of shape (n_samples,)): Model predictions, probailities of the positive class.

        Returns:
            Dict[str, float]: Returns threshold (for classification), sensitivity, specificity, NPV and PPV
                at the optimal Youden index and the ROC AUC score.
        """
        # 1. Compute ROC AUC
        roc_auc = roc_auc_score(references, prediction_scores)
        # 2. Determine the optimal threshold based on the Youden index
        fpr, tpr, thresholds = roc_curve(references, prediction_scores, drop_intermediate=False)
        idx = np.argmax(tpr - fpr)
        optimal_threshold = thresholds[idx]
        # 3. Calculate PPV (precision) and NPV based on the optimal threshold
        optimal_predictions = np.where(prediction_scores >= optimal_threshold, 1, 0)
        ppv = precision_score(references, optimal_predictions)
        npv = precision_score(references, optimal_predictions, pos_label=0)
        return {
            "youden_threshold": optimal_threshold,
            "sensitivity": tpr[idx],
            "specificity": 1 - fpr[idx],
            "roc_auc": roc_auc,
            "ppv": ppv,
            "npv": npv,
        }