Source code for pyEdgeEval.common.multi_label.calculate_metrics

#!/usr/bin/env python3

import numpy as np

from pyEdgeEval.common.metrics import (
    compute_rec_prec_f1,
    interpolated_max_scores,
)
from pyEdgeEval.common.utils import check_thresholds
from pyEdgeEval.utils import (
    track_parallel_progress,
    track_progress,
)

__all__ = ["calculate_metrics"]


[docs]def calculate_metrics(
    eval_single,
    thresholds,
    samples,
    nproc=8,
):
    """Main function to calculate boundary metrics

    Args:
        eval_single (Callable): function that takes samples (dict) as input
        threhsolds (int, float, list, np.ndarray): thresholds used for evaluation
        samples (dict): list of dicts containing sample info
        nproc (int): integer that specifies the number of processes to spawn

    Returns:
        dict of metrics
    """

    # initial run (process heavy)
    if nproc > 1:
        sample_metrics = track_parallel_progress(
            eval_single,
            samples,
            nproc=nproc,
            keep_order=True,
        )
    else:
        sample_metrics = track_progress(
            eval_single,
            samples,
        )

    # check and convert
    thresholds = check_thresholds(thresholds)

    # initialize array
    n_thresh = thresholds.shape[0]
    count_r_overall = np.zeros((n_thresh,))
    sum_r_overall = np.zeros((n_thresh,))
    count_p_overall = np.zeros((n_thresh,))
    sum_p_overall = np.zeros((n_thresh,))

    # OIS scores
    count_r_best = 0
    sum_r_best = 0
    count_p_best = 0
    sum_p_best = 0

    # calculate metrics
    sample_results = []
    for sample_index, sample_data in enumerate(samples):
        count_r, sum_r, count_p, sum_p = sample_metrics[sample_index]

        count_r_overall += count_r
        sum_r_overall += sum_r
        count_p_overall += count_p
        sum_p_overall += sum_p

        # Compute precision, recall and F1
        rec, prec, f1 = compute_rec_prec_f1(count_r, sum_r, count_p, sum_p)

        # best_thresh, best_rec, best_prec, best_f1 = interpolated_max_scores(thresholds, rec, prec)

        # Find best F1 score
        best_ndx = np.argmax(f1)

        # Gather OIS metrics

        count_r_best += count_r[best_ndx]
        sum_r_best += sum_r[best_ndx]
        count_p_best += count_p[best_ndx]
        sum_p_best += sum_p[best_ndx]

        sample_results.append(
            dict(
                name=sample_data["name"],
                threshold=thresholds[best_ndx],
                recall=rec[best_ndx],
                precision=prec[best_ndx],
                f1=f1[best_ndx],
            )
        )

    # Computer overall precision, recall and F1
    rec_overall, prec_overall, f1_overall = compute_rec_prec_f1(
        count_r_overall, sum_r_overall, count_p_overall, sum_p_overall
    )

    # Interpolated way to find ODS scores
    best_threshold, best_rec, best_prec, best_f1 = interpolated_max_scores(
        thresholds, rec_overall, prec_overall
    )

    # Find best F1 score
    # best_i_ovr = np.argmax(f1_overall)

    threshold_results = []
    for thresh_i in range(n_thresh):
        threshold_results.append(
            dict(
                threshold=thresholds[thresh_i],
                recall=rec_overall[thresh_i],
                precision=prec_overall[thresh_i],
                f1=f1_overall[thresh_i],
            )
        )

    # Calculate AUC
    prec_inc = 0.01  # hard-coded
    rec_unique, rec_unique_ndx = np.unique(rec_overall, return_index=True)
    prec_unique = prec_overall[rec_unique_ndx]
    if rec_unique.shape[0] > 1:
        prec_interp = np.interp(
            np.arange(0, 1, prec_inc),
            rec_unique,
            prec_unique,
            left=0.0,
            right=0.0,
        )
        area_pr = prec_interp.sum() * prec_inc
    else:
        area_pr = 0.0

    # Calculate AP
    ap = 0
    for t in np.arange(0, 1, 0.01):
        _r = rec_overall >= t
        p = np.max(prec_overall[_r], initial=0)
        ap = ap + p / 101

    # Calculate OIS metrics
    rec_best, prec_best, f1_best = compute_rec_prec_f1(
        float(count_r_best),
        float(sum_r_best),
        float(count_p_best),
        float(sum_p_best),
    )

    overall_result = dict(
        ODS_threshold=best_threshold,
        ODS_recall=best_rec,
        ODS_precision=best_prec,
        ODS_f1=best_f1,
        OIS_recall=rec_best,
        OIS_precision=prec_best,
        OIS_f1=f1_best,
        AUC=area_pr,
        AP=ap,
    )

    return sample_results, threshold_results, overall_result