diff --git a/examples/_uci_experiments_checking_optim_threshold_modifications.py b/examples/_uci_experiments_checking_optim_threshold_modifications.py index e5ae184..79f7208 100644 --- a/examples/_uci_experiments_checking_optim_threshold_modifications.py +++ b/examples/_uci_experiments_checking_optim_threshold_modifications.py @@ -15,7 +15,7 @@ import itertools import argparse from glob import glob import pandas as pd - +from time import time N_JOBS = -1 @@ -38,10 +38,11 @@ svmperf_params = {'classifier__C': __C_range} def quantification_models(): yield 'acc', ACC(newLR()), lr_params yield 'T50', T50(newLR()), lr_params - #yield 'X', X(newLR()), lr_params - #yield 'MAX', MAX(newLR()), lr_params + yield 'X', X(newLR()), lr_params + yield 'MAX', MAX(newLR()), lr_params yield 'MS', MS(newLR()), lr_params - yield 'MS2', MS2(newLR()), lr_params + yield 'MS+', MS(newLR()), lr_params + # yield 'MS2', MS2(newLR()), lr_params @@ -115,8 +116,10 @@ if __name__ == '__main__': optim_losses = ['mae'] datasets = qp.datasets.UCI_DATASETS + tstart = time() models = quantification_models() qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS) + tend = time() # open all results and show df = pd.DataFrame(columns=('method', 'dataset', 'mae')) @@ -126,6 +129,6 @@ if __name__ == '__main__': dataset = '-'.join(dataset) df.loc[i] = [method, dataset, mae] - print(df.pivot_table(index='dataset', columns='method', values='mae')) - + print(df.pivot_table(index='dataset', columns='method', values='mae', margins=True)) + print(f'took {(tend-tstart)}s') diff --git a/quapy/functional.py b/quapy/functional.py index d39b306..c6dc351 100644 --- a/quapy/functional.py +++ b/quapy/functional.py @@ -66,7 +66,7 @@ def prevalence_from_probabilities(posteriors, binarize: bool = False): return prevalences -def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False): +def as_binary_prevalence(positive_prevalence: Union[float, np.ndarray], clip_if_necessary=False): """ Helper that, given a float representing the prevalence for the positive class, returns a np.ndarray of two values representing a binary distribution. @@ -80,7 +80,8 @@ def as_binary_prevalence(positive_prevalence: float, clip_if_necessary=False): positive_prevalence = np.clip(positive_prevalence, 0, 1) else: assert 0 <= positive_prevalence <= 1, 'the value provided is not a valid prevalence for the positive class' - return np.asarray([1-positive_prevalence, positive_prevalence]) + return np.asarray([1-positive_prevalence, positive_prevalence]).T + def HellingerDistance(P, Q) -> float: diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index 8053d47..066f480 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -1102,7 +1102,7 @@ class ThresholdOptimization(BinaryAggregativeQuantifier): :param fpr: float, false positive rate :return: true if the combination is to be discarded, false otherwise """ - return (tpr + fpr) == 0 + return (tpr - fpr) == 0 def _eval_candidate_thresholds(self, decision_scores, y): @@ -1119,9 +1119,9 @@ class ThresholdOptimization(BinaryAggregativeQuantifier): candidates = [] scores = [] for candidate_threshold in candidate_thresholds: - y_ = self.classes_[1 * (decision_scores > candidate_threshold)] + y_ = self.classes_[1 * (decision_scores >= candidate_threshold)] TP, FP, FN, TN = self._compute_table(y, y_) - tpr = self._compute_tpr(TP, FP) + tpr = self._compute_tpr(TP, FN) fpr = self._compute_fpr(FP, TN) if not self.discard(tpr, fpr): candidate_score = self.condition(tpr, fpr) @@ -1139,12 +1139,18 @@ class ThresholdOptimization(BinaryAggregativeQuantifier): return candidates - def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold): - prevs_estim = np.mean(classif_predictions > threshold) - if tpr - fpr != 0: - prevs_estim = (prevs_estim - fpr) / (tpr - fpr) - prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True) - return prevs_estim + # def aggregate_with_threshold(self, classif_predictions, tpr, fpr, threshold): + # prevs_estim = np.mean(classif_predictions >= threshold) + # if tpr - fpr != 0: + # prevs_estim = (prevs_estim - fpr) / (tpr - fpr) + # prevs_estim = F.as_binary_prevalence(prevs_estim, clip_if_necessary=True) + # return prevs_estim + + def aggregate_with_threshold(self, classif_predictions, tprs, fprs, thresholds): + prevs_estims = np.mean(classif_predictions[:, None] >= thresholds, axis=0) + prevs_estims = (prevs_estims - fprs) / (tprs - fprs) + prevs_estims = F.as_binary_prevalence(prevs_estims, clip_if_necessary=True) + return prevs_estims.squeeze() def _compute_table(self, y, y_): TP = np.logical_and(y == y_, y == self.pos_label).sum()