From bb5763da76b60b980eacdfa47d7fae0ce53587d3 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Thu, 12 Feb 2026 13:14:59 +0100 Subject: [PATCH] broken submodule fix --- .gitmodules | 3 + BayesianKDEy/TODO.txt | 1 + BayesianKDEy/commons.py | 7 + BayesianKDEy/datasets.py | 4 +- BayesianKDEy/full_experiments.py | 95 +++---- BayesianKDEy/generate_results.py | 359 ++++++++++++++++++------ BayesianKDEy/map_experiments.py | 169 +++++++++++ BayesianKDEy/methods.py | 168 +++++++++++ BayesianKDEy/plot_simplex.py | 45 +-- BayesianKDEy/temperature_calibration.py | 60 +++- CHANGE_LOG.txt | 12 +- TODO.txt | 15 +- quapy/method/_bayesian.py | 42 ++- quapy/method/_kdey.py | 27 +- quapy/method/aggregative.py | 1 + quapy/method/confidence.py | 170 +++++++++-- result_table | 1 + 17 files changed, 969 insertions(+), 210 deletions(-) create mode 100644 .gitmodules create mode 100644 BayesianKDEy/map_experiments.py create mode 100644 BayesianKDEy/methods.py create mode 160000 result_table diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..89cf11c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "result_table"] + path = result_table + url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt index d4d8d58..59e835e 100644 --- a/BayesianKDEy/TODO.txt +++ b/BayesianKDEy/TODO.txt @@ -1,4 +1,5 @@ - Things to try: + - Why not optmize the calibration of the classifier, instead of the classifier as a component of the quantifier? - init chain helps? [seems irrelevant in MAPLS...] - Aitchison kernel is better? - other classifiers? diff --git a/BayesianKDEy/commons.py b/BayesianKDEy/commons.py index 3a2b73e..c33e3fa 100644 --- a/BayesianKDEy/commons.py +++ b/BayesianKDEy/commons.py @@ -101,6 +101,13 @@ class KDEyCLR(KDEyML): random_state=random_state, kernel='aitchison' ) +class KDEyCLR2(KDEyML): + def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None): + super().__init__( + classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth, + random_state=random_state, kernel='aitchison' + ) + class KDEyILR(KDEyML): def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None): diff --git a/BayesianKDEy/datasets.py b/BayesianKDEy/datasets.py index f4c80f3..0a19dce 100644 --- a/BayesianKDEy/datasets.py +++ b/BayesianKDEy/datasets.py @@ -180,7 +180,7 @@ class VisualDataHandler(DatasetHandler): @classmethod def get_datasets(cls): - datasets = ['cifar10', 'mnist', 'cifar100coarse', 'fashionmnist', 'svhn'] #+ ['cifar100'] + datasets = ['cifar100coarse', 'cifar10', 'mnist', 'fashionmnist', 'svhn'] #+ ['cifar100'] # datasets_feat = [f'{d}-f' for d in datasets] datasets_feat = [f'{d}-l' for d in datasets] return datasets_feat # + datasets @@ -295,7 +295,7 @@ class UCIDatasetHandler(DatasetHandler, ABC): class UCIMulticlassHandler(UCIDatasetHandler): - DATASETS = [d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])] + DATASETS = sorted([d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])]) def __init__(self, name, n_val_samples=100, n_test_samples=100, sample_size=1000, random_state=0): super().__init__(name, n_val_samples, n_test_samples, sample_size, random_state) diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py index a176b59..c643a52 100644 --- a/BayesianKDEy/full_experiments.py +++ b/BayesianKDEy/full_experiments.py @@ -4,6 +4,7 @@ from sklearn.linear_model import LogisticRegression from copy import deepcopy as cp import quapy as qp from BayesianKDEy.commons import KDEyReduce +from BayesianKDEy.methods import get_experimental_methods, MethodDescriptor from _bayeisan_kdey import BayesianKDEy from _bayesian_mapls import BayesianMAPLS from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh @@ -24,7 +25,7 @@ from time import time -def methods(data_handler: DatasetHandler): +def methods___depr(): """ Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where: - name: is a str representing the name of the method (e.g., 'BayesianKDEy') @@ -33,26 +34,23 @@ def methods(data_handler: DatasetHandler): - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the quantifier with optimized hyperparameters """ - if False: # isinstance(data_handler, VisualDataHandler): - Cls = MockClassifierFromPosteriors - cls_hyper = {} - val_split = data_handler.get_validation().Xy # use this specific collection - pass - else: - Cls = LogisticRegression - cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]} - val_split = 5 # k-fold cross-validation + + Cls = LogisticRegression + cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]} + val_split = 5 # k-fold cross-validation + cc_hyper = cls_hyper acc_hyper = cls_hyper # emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **cls_hyper} hdy_hyper = {'nbins': [3,4,5,8,16,32], **cls_hyper} kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper} kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper} - + band ={'bandwidth':np.logspace(-3,-1,10)} multiclass_method = 'multiclass' only_binary = 'only_binary' only_multiclass = 'only_multiclass' # surrogate quantifiers + cc = CC(Cls()) acc = ACC(Cls(), val_split=val_split) hdy = DMy(Cls(), val_split=val_split) kde_gau = KDEyML(Cls(), val_split=val_split) @@ -65,22 +63,26 @@ def methods(data_handler: DatasetHandler): # Bootstrap approaches: # -------------------------------------------------------------------------------------------------------- + # yield 'BootstrapCC', cc, cc_hyper, lambda hyper: AggregativeBootstrap(CC(Cls()), n_test_samples=1000, random_state=0), multiclass_method #yield 'BootstrapACC', acc, acc_hyper, lambda hyper: _AggregativeBootstrap(ACC(Cls()), n_test_samples=1000, random_state=0), multiclass_method #yield 'BootstrapEMQ', emq, on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: _AggregativeBootstrap(EMQ(Cls(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method #yield 'BootstrapHDy', hdy, hdy_hyper, lambda hyper: _AggregativeBootstrap(DMy(Cls(), **hyper), n_test_samples=1000, random_state=0), multiclass_method #yield 'BootstrapKDEy', kde_gau, kdey_hyper, lambda hyper: _AggregativeBootstrap(KDEyML(Cls(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method - # Bayesian approaches: + # Bayesian approaches: (*=temp calibration auto threshold and coverage sim to nominal; +=temp calibration w/o amplitude coverage, for winkler criterion, !=same but alpha=0.005 for winkler) # -------------------------------------------------------------------------------------------------------- # yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method + # yield 'BayesianACC*', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method + # yield 'BayesianACC+', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method + # yield 'BayesianACC!', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method #yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary # yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method #yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method #yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method - yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method - yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method - yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method - yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + #yield f'BaKDE-Gau-pca5', kde_gau_pca, band, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + #yield f'BaKDE-Gau-pca5*', kde_gau_pca, band, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + #yield f'BaKDE-Gau-pca10', kde_gau_pca10, band, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method + #yield f'BaKDE-Gau-pca10*', kde_gau_pca10, band, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method # yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method # yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method # yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5, @@ -89,19 +91,21 @@ def methods(data_handler: DatasetHandler): # engine='numpyro', # **hyper), multiclass_method # yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method + # yield f'BaKDE-Ait-T!', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method #yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method + #yield f'BaKDE-Gau-T!', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method # yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method # yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method + # yield 'BayEMQ!', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method + # yield 'BaEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method + + # yield 'BaACC!', acc, acc_hyper, lambda hyper: BayesianCC(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), temperature=None, mcmc_seed=0), multiclass_method + # yield 'BaEMQ!', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), prior='uniform', temperature=None, exact_train_prev=False), multiclass_method def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict): with qp.util.temp_seed(0): - if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid: - def scale_bandwidth(bandwidth, n_classes, beta=0.5): - return bandwidth * np.power(n_classes, beta) - n = dataset.get_training().n_classes - grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']] - print('bandwidth scaled') + point_quantifier = cp(point_quantifier) print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}') # model selection if len(grid)>0: @@ -127,30 +131,26 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier): if uncertainty_quantifier.temperature is None: print('calibrating temperature') train, val_prot = dataset.get_train_valprot_for_modsel() - if dataset.name.startswith('LeQua'): - temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000] - else: - temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.] - temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999) + temp_grid=[1., .5, 1.5, 2., 5., 10., 100., 1000.] + temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=1., criterion='winkler') uncertainty_quantifier.temperature = temperature else: temperature = uncertainty_quantifier.temperature return temperature - -def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path): +def experiment(dataset: DatasetHandler, method: MethodDescriptor, hyper_choice_path: Path): with qp.util.temp_seed(0): # model selection best_hyperparams = qp.util.pickled_resource( - hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid + hyper_choice_path, model_selection, dataset, method.surrogate_quantifier(), method.hyper_parameters ) print(f'{best_hyperparams=}') t_init = time() - uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams) + uncertainty_quantifier = method.uncertainty_aware_quantifier(best_hyperparams) temperature = temperature_calibration(dataset, uncertainty_quantifier) training, test_generator = dataset.get_train_testprot_for_eval() uncertainty_quantifier.fit(*training.Xy) @@ -176,7 +176,13 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, results['test-time'].append(ttime) results['samples'].append(region.samples) - pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}') + pbar.set_description( + f'{method.name} ' + f'MAE={np.mean(results["ae"]):.5f} ' + f'W={np.mean(results["sre"]):.5f} ' + f'Cov={np.mean(results["coverage"]):.5f} ' + f'AMP={np.mean(results["amplitude"]):.5f}' + ) report = { 'optim_hyper': best_hyperparams, @@ -189,40 +195,31 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, return report -def check_skip_experiment(method_scope, dataset: DatasetHandler): - if method_scope == 'only_binary' and not dataset.is_binary(): - return True - if method_scope == 'only_multiclass' and dataset.is_binary(): - return True - return False - - if __name__ == '__main__': result_dir = RESULT_DIR - for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]: + for data_handler in [LeQuaHandler]:#, UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]: for dataset in data_handler.iter(): qp.environ['SAMPLE_SIZE'] = dataset.sample_size print(f'dataset={dataset.name}') - #if dataset.name != 'abalone': - # continue problem_type = 'binary' if dataset.is_binary() else 'multiclass' - for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods(dataset): - if check_skip_experiment(method_scope, dataset): + for method in get_experimental_methods(): + # skip combination? + if method.binary_only() and not dataset.is_binary(): continue - result_path = experiment_path(result_dir / problem_type, dataset.name, method_name) - hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, surrogate_quant.__class__.__name__) + result_path = experiment_path(result_dir / problem_type, dataset.name, method.name) + hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, method.surrogate_quantifier_name()) report = qp.util.pickled_resource( - result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path + result_path, experiment, dataset, method, hyper_path ) print(f'dataset={dataset.name}, ' - f'method={method_name}: ' + f'method={method.name}: ' f'mae={report["results"]["ae"].mean():.5f}, ' f'W={report["results"]["sre"].mean():.5f}, ' f'coverage={report["results"]["coverage"].mean():.5f}, ' diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py index cb1932f..5db5c37 100644 --- a/BayesianKDEy/generate_results.py +++ b/BayesianKDEy/generate_results.py @@ -1,6 +1,7 @@ import pickle from collections import defaultdict - +import matplotlib.pyplot as plt +import seaborn as sns from joblib import Parallel, delayed from tqdm import tqdm import pandas as pd @@ -9,10 +10,16 @@ from pathlib import Path import quapy as qp from BayesianKDEy.commons import RESULT_DIR from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler +from comparison_group import SelectGreaterThan, SelectByName, SelectSmallerThan +from format import FormatModifierSelectColor from quapy.error import dist_aitchison -from quapy.method.confidence import ConfidenceIntervals +from quapy.method.confidence import ConfidenceIntervals, ConfidenceIntervalsILR, ConfidenceIntervalsCLR from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC import quapy.functional as F +from result_path.src.table import LatexTable +import numpy as np +import pandas as pd +from itertools import chain pd.set_option('display.max_columns', None) pd.set_option('display.width', 2000) @@ -23,19 +30,16 @@ pd.set_option("display.float_format", "{:.4f}".format) # methods = None # show all methods -methods = ['BayesianACC', - 'BaKDE-Ait-numpyro', - 'BaKDE-Ait-T*', - 'BaKDE-Gau-numpyro', - 'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*', - # 'BayEMQ-U-Temp1-2', - # 'BayEMQ-T*', - 'BayEMQ', - 'BayEMQ*', - # 'BootstrapACC', - # 'BootstrapHDy', - # 'BootstrapKDEy', - # 'BootstrapEMQ' +methods = ['BoCC', + 'BaACC!', + 'BaEMQ!', + 'BaKDE-Gau-T!', + 'BaKDE-Ait-T!', + 'BaKDE-Ait-T!2' + #'BootstrapACC', + #'BootstrapHDy', + #'BootstrapKDEy', + #'BootstrapEMQ' ] def region_score(true_prev, region: ConfidenceRegionABC): @@ -55,11 +59,15 @@ def compute_coverage_amplitude(region_constructor, **kwargs): def process_one(samples, true_prevs): region = region_constructor(samples, **kwargs) - if isinstance(region, ConfidenceIntervals): + if isinstance(region, ConfidenceIntervals) or isinstance(region, ConfidenceIntervalsCLR) or isinstance(region, ConfidenceIntervalsILR): winkler = region.mean_winkler_score(true_prevs) + # winkler_e = region.mean_winkler_score(true_prevs, add_ae=True) + cov_soft = region.coverage_soft(true_prevs) else: winkler = None - return region.coverage(true_prevs), region.montecarlo_proportion(), winkler + # winkler_e = None + cov_soft = None + return region.coverage(true_prevs), region.montecarlo_proportion(), winkler, cov_soft out = Parallel(n_jobs=3)( delayed(process_one)(samples, true_prevs) @@ -71,8 +79,8 @@ def compute_coverage_amplitude(region_constructor, **kwargs): ) # unzip results - coverage, amplitude, winkler = zip(*out) - return list(coverage), list(amplitude), list(winkler) + coverage, amplitude, winkler, cov_soft = zip(*out) + return list(coverage), list(amplitude), list(winkler), list(cov_soft) def update_pickle(report, pickle_path, updated_dict:dict): @@ -83,35 +91,151 @@ def update_pickle(report, pickle_path, updated_dict:dict): def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs): if f'coverage-{conf_name}' not in report: - covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs) - - # amperr (lower is better) counts the amplitude when the true vale was covered, or 1 (max amplitude) otherwise - amperrs = [amp if cov == 1.0 else 1. for amp, cov in zip(amps, covs)] + covs, amps, winkler, cov_soft = compute_coverage_amplitude(conf_region_class, **kwargs) update_fields = { f'coverage-{conf_name}': covs, f'amplitude-{conf_name}': amps, f'winkler-{conf_name}': winkler, - f'amperr-{conf_name}': amperrs, + f'coverage-soft-{conf_name}': cov_soft } update_pickle(report, file, update_fields) +def pareto_front(df, x_col, y_col, maximize_y=True, minimize_x=True): + """ + Returns a boolean mask indicating whether each row is Pareto-optimal. + """ + X = df[x_col].values + Y = df[y_col].values -def nicer(name:str): + is_pareto = np.ones(len(df), dtype=bool) + + for i in range(len(df)): + if not is_pareto[i]: + continue + for j in range(len(df)): + if i == j: + continue + + better_or_equal_x = X[j] <= X[i] if minimize_x else X[j] >= X[i] + better_or_equal_y = Y[j] >= Y[i] if maximize_y else Y[j] <= Y[i] + + strictly_better = ( + (X[j] < X[i] if minimize_x else X[j] > X[i]) or + (Y[j] > Y[i] if maximize_y else Y[j] < Y[i]) + ) + + if better_or_equal_x and better_or_equal_y and strictly_better: + is_pareto[i] = False + break + + return is_pareto + +def plot_coverage_vs_amplitude( + df, + coverage_col, + amplitude_col="a-CI", + method_col="method", + dataset_col=None, + error_col=None, + error_threshold=None, + nominal_coverage=0.95, + title=None, +): + df_plot = df.copy() + + # Optional error filtering + if error_col is not None and error_threshold is not None: + df_plot = df_plot[df_plot[error_col] <= error_threshold] + + # Compute Pareto front + pareto_mask = pareto_front( + df_plot, + x_col=amplitude_col, + y_col=coverage_col, + maximize_y=True, + minimize_x=True + ) + + plt.figure(figsize=(7, 6)) + + # Base scatter + sns.scatterplot( + data=df_plot, + x=amplitude_col, + y=coverage_col, + hue=method_col, + # style=dataset_col, + alpha=0.6, + s=60, + legend=True + ) + + # Highlight Pareto front + plt.scatter( + df_plot.loc[pareto_mask, amplitude_col], + df_plot.loc[pareto_mask, coverage_col], + facecolors='none', + edgecolors='black', + s=120, + linewidths=1.5, + label="Pareto front" + ) + + # Nominal coverage line + plt.axhline( + nominal_coverage, + linestyle="--", + color="gray", + linewidth=1, + label="Nominal coverage" + ) + + plt.xlabel("Amplitude (fraction of simplex)") + plt.ylabel("Coverage") + plt.ylim(0, 1.05) + + if title is not None: + plt.title(title) + + plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left") + plt.tight_layout() + plt.show() + + +def nicer_method(name:str): replacements = { - 'Bayesian': 'Ba', + # 'Bayesian': 'Ba', 'Bootstrap': 'Bo', '-numpyro': '', 'emcee': 'emc', - '-T*': '*' + '-T*': '*', + '-T!': '', + '!': '', + '-Ait': r'$^{(\mathrm{Ait})}$', + '-Gau': r'$^{(\mathrm{Gau})}$' } for k, v in replacements.items(): name = name.replace(k,v) return name +def nicer_data(name:str): + replacements = { + 'cifar': 'CIFAR', + '-l': '', + 'mnist': 'MNIST', + 'fashionmnist': 'fashionMNIST', + 'svhn': 'SVHN', + '100coarse': '100(20)', + } + for k, v in replacements.items(): + name = name.replace(k, v) + return name + + base_dir = RESULT_DIR table = defaultdict(list) @@ -119,62 +243,56 @@ n_classes = {} tr_size = {} tr_prev = {} -for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]: - problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass' - path = f'./{base_dir}/{problem_type}/*.pkl' - - for file in tqdm(glob(path), desc='processing results', total=len(glob(path))): - file = Path(file) - dataset, method = file.name.replace('.pkl', '').split('__') - if (method not in methods) or (dataset not in dataset_handler.get_datasets()): - continue - - report = pickle.load(open(file, 'rb')) - results = report['results'] - n_samples = len(results['ae']) - table['method'].extend([nicer(method)] * n_samples) - table['dataset'].extend([dataset] * n_samples) - table['ae'].extend(results['ae']) - table['rae'].extend(results['rae']) - # table['c-CI'].extend(results['coverage']) - # table['a-CI'].extend(results['amplitude']) - - update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True) - # update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex) - # update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR) - # update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR) - - table['c-CI'].extend(report['coverage-CI']) - table['a-CI'].extend(report['amplitude-CI']) - table['w-CI'].extend(report['winkler-CI']) - table['amperr-CI'].extend(report['amperr-CI']) - - # table['c-CE'].extend(report['coverage-CE']) - # table['a-CE'].extend(report['amplitude-CE']) - # table['amperr-CE'].extend(report['amperr-CE']) - - # table['c-CLR'].extend(report['coverage-CLR']) - # table['a-CLR'].extend(report['amplitude-CLR']) - # table['amperr-CLR'].extend(report['amperr-CLR']) - - # table['c-ILR'].extend(report['coverage-ILR']) - # table['a-ILR'].extend(report['amplitude-ILR']) - # table['amperr-ILR'].extend(report['amperr-ILR']) - - table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim'])) - table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001)) - # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']])) - # table['aitch'].extend() - # table['reg-score-ILR'].extend( - # [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])] - # ) - - for dataset in dataset_handler.iter(): +dataset_class = [UCIMulticlassHandler, CIFAR100Handler, VisualDataHandler, LeQuaHandler] +dataset_order = [] +for handler in dataset_class: + for dataset in handler.iter(): + dataset_order.append(dataset.name) train = dataset.get_training() n_classes[dataset.name] = train.n_classes tr_size[dataset.name] = len(train) tr_prev[dataset.name] = F.strprev(train.prevalence()) + +problem_type = 'multiclass' +path = f'./{base_dir}/{problem_type}/*.pkl' + +for file in tqdm(glob(path), desc='processing results', total=len(glob(path))): + file = Path(file) + dataset, method = file.name.replace('.pkl', '').split('__') + if (method not in methods) or (dataset not in dataset_order): + continue + + report = pickle.load(open(file, 'rb')) + results = report['results'] + n_samples = len(results['ae']) + table['method'].extend([nicer_method(method)] * n_samples) + table['dataset'].extend([nicer_data(dataset)] * n_samples) + table['ae'].extend(results['ae']) + table['rae'].extend(results['rae']) + # table['c-CI'].extend(results['coverage']) + # table['a-CI'].extend(results['amplitude']) + + # update_pickle_with_region(report, file, conf_name='CI-ILR', conf_region_class=ConfidenceIntervalsILR, bonferroni_correction=True) + # update_pickle_with_region(report, file, conf_name='CI-CLR', conf_region_class=ConfidenceIntervalsCLR, bonferroni_correction=True) + update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True) + update_pickle_with_region(report, file, conf_name='CInb', conf_region_class=ConfidenceIntervals, bonferroni_correction=False) # no Bonferroni-correction + # update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex) + # update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR) + # update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR) + + conf_bonferroni = 'CI' + conf_name='CInb' + table['c-CI'].extend(report[f'coverage-{conf_bonferroni}']) # the true coverage is better measured with Bonferroni-correction + table['w-CI'].extend(report[f'winkler-{conf_name}']) + table['cs-CI'].extend(report[f'coverage-soft-{conf_name}']) + table['a-CI'].extend(report[f'amplitude-{conf_name}']) + + # table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim'])) # not in this paper... + table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001)) + + + # remove datasets with more than max_classes classes # max_classes = 25 # min_train = 500 @@ -190,19 +308,100 @@ for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, C # df = df[df["dataset"] != data_name] + + df = pd.DataFrame(table) +df['a-CI'] *= 100 +df['c-CI'] *= 100 +df['cs-CI'] *= 100 + for region in ['CI']: #, 'CLR', 'ILR', 'CI']: if problem_type == 'binary' and region=='ILR': continue - for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']: + for column in [f'a-{region}', 'ae', 'SRE', f'c-{region}', f'cs-{region}']: # f'w-{region}' pv = pd.pivot_table( df, index='dataset', columns='method', values=column, margins=True ) pv['n_classes'] = pv.index.map(n_classes).astype('Int64') pv['tr_size'] = pv.index.map(tr_size).astype('Int64') #pv['tr-prev'] = pv.index.map(tr_prev) - pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"]) + pv = pv.drop(columns=[col for col in pv.columns if col == "All" or col[-1]=='All']) print(f'{problem_type=} {column=}') print(pv) print('-'*80) + latex = LatexTable.from_dataframe(df, method='method', benchmark='dataset', value=column, name=column) + latex.format.configuration.show_std = False + #latex.reorder_methods([nicer_method(m) for m in methods]) + latex.reorder_benchmarks([nicer_data(d) for d in dataset_order]) + if column in ['ae', 'SRE']: + latex.format.configuration.lower_is_better = True + latex.format.configuration.stat_test = 'wilcoxon' + #latex.format.configuration.stat_test = None + # latex.format.configuration.show_std = True + + if column in [f'c-{region}', f'cs-{region}']: + latex.format.configuration.lower_is_better = False + latex.format.configuration.stat_test = None + latex.format.configuration.with_color = False + latex.format.configuration.best_in_bold = False + latex.format.configuration.with_rank = False + latex.format.configuration.mean_prec = 0 + latex.add_format_modifier( + format_modifier=FormatModifierSelectColor( + comparison=SelectGreaterThan(reference_selector=89, input_selector=SelectByName()) + ) + ) + if column in [f'a-{region}']: + latex.format.configuration.lower_is_better = True + latex.format.configuration.stat_test = None + latex.format.configuration.with_color = False + latex.format.configuration.best_in_bold = False + latex.format.configuration.mean_prec = 2 + latex.add_format_modifier( + format_modifier=FormatModifierSelectColor( + comparison=SelectSmallerThan(reference_selector=11, input_selector=SelectByName()) + ) + ) + # latex.add_format_modifier( + # format_modifier=FormatModifierSelectColor( + # comparison=SelectSmallerThan(reference_selector=0.01, input_selector=SelectByName()), + # intensity=50 + # ) + # ) + + latex.format.configuration.resizebox=.5 + latex.latexPDF(pdf_path=f'./tables/{latex.name}.pdf') + + +df = df[df['method']!='BaACC'] +df = df[df['method']!='BaACC*'] +df = df[df['method']!='BaACC+'] +df = df[df['method']!='BaKDE-Ait*'] +df = df[df['method']!='BaKDE-Gau*'] +df = df[df['method']!='BayEMQ*'] +grouped = df.groupby(["method", "dataset"]) +agg = grouped.agg( + ae_mean=("ae", "mean"), + ae_std=("ae", "std"), + sre_mean=("SRE", "mean"), + sre_std=("SRE", "std"), + coverage_mean=("c-CI", "mean"), + coverage_std=("c-CI", "std"), + coverage_soft_mean=("cs-CI", "mean"), + amplitude_mean=("a-CI", "mean"), + amplitude_std=("a-CI", "std"), +).reset_index() + +#plot_coverage_vs_amplitude( +# agg, +# coverage_col="coverage_soft_mean", +# amplitude_col="amplitude_mean", +# method_col="method", +# dataset_col="dataset", +# nominal_coverage=0.95, +# title="Marginal coverage vs amplitude" +#) + + +#print('RESTITUIR EL WILCOXON') diff --git a/BayesianKDEy/map_experiments.py b/BayesianKDEy/map_experiments.py new file mode 100644 index 0000000..65e34b8 --- /dev/null +++ b/BayesianKDEy/map_experiments.py @@ -0,0 +1,169 @@ +import os.path +from pathlib import Path +import pandas as pd +from sklearn.linear_model import LogisticRegression +from copy import deepcopy as cp +import quapy as qp +from BayesianKDEy.commons import KDEyReduce +from _bayeisan_kdey import BayesianKDEy +from _bayesian_mapls import BayesianMAPLS +from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh +# import datasets +from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler +from method.confidence import ConfidenceIntervals +from temperature_calibration import temp_calibration +from build.lib.quapy.data import LabelledCollection +from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC +from quapy.model_selection import GridSearchQ +from quapy.data import Dataset +from quapy.method.confidence import BayesianCC, AggregativeBootstrap +from quapy.method.aggregative import KDEyML, ACC +from quapy.protocol import UPP +import numpy as np +from tqdm import tqdm +from collections import defaultdict +from time import time + + +def methods(data_handler: DatasetHandler): + """ + Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where: + - name: is a str representing the name of the method (e.g., 'BayesianKDEy') + - quantifier: is the base model (e.g., KDEyML()) + - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}) + - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the + quantifier with optimized hyperparameters + """ + if isinstance(data_handler, VisualDataHandler): + Cls = LogisticRegression + cls_hyper = {} + else: + Cls = LogisticRegression + cls_hyper = {'classifier__C': np.logspace(-4, 4, 9), 'classifier__class_weight': ['balanced', None]} + kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper} + kdey_hyper_larger = {'bandwidth': np.logspace(-1, 0, 10), **cls_hyper} + kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper} + + # surrogate quantifiers + kde_gau_scale = KDEyScaledB(Cls()) + + yield 'KDEy-G-exp', kdey_hyper, KDEyML(Cls()) + # yield 'KDEy-G-exp2', kdey_hyper_larger, KDEyML(Cls()) + # yield 'KDEy-G-log', kdey_hyper, KDEyML(Cls(), logdensities=True) + yield 'KDEy-Ait', kdey_hyper_clr, KDEyCLR(Cls()) + + +def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict): + with qp.util.temp_seed(0): + if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid: + def scale_bandwidth(bandwidth, n_classes, beta=0.5): + return bandwidth * np.power(n_classes, beta) + + n = dataset.get_training().n_classes + grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']] + print('bandwidth scaled') + print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}') + # model selection + if len(grid) > 0: + train, val_prot = dataset.get_train_valprot_for_modsel() + mod_sel = GridSearchQ( + model=point_quantifier, + param_grid=grid, + protocol=val_prot, + refit=False, + n_jobs=-1, + verbose=True + ).fit(*train.Xy) + best_params = mod_sel.best_params_ + else: + best_params = {} + + return best_params + + +def experiment(dataset: DatasetHandler, + point_quantifier: AggregativeQuantifier, + method_name: str, + grid: dict, + hyper_choice_path: Path): + + with qp.util.temp_seed(0): + # model selection + best_hyperparams = qp.util.pickled_resource( + hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid + ) + print(f'{best_hyperparams=}') + + t_init = time() + training, test_generator = dataset.get_train_testprot_for_eval() + point_quantifier.fit(*training.Xy) + tr_time = time() - t_init + + # test + train_prevalence = training.prevalence() + results = defaultdict(list) + pbar = tqdm(enumerate(test_generator()), total=test_generator.total()) + for i, (sample_X, true_prevalence) in pbar: + t_init = time() + point_estimate = point_quantifier.predict(sample_X) + ttime = time() - t_init + + results['true-prevs'].append(true_prevalence) + results['point-estim'].append(point_estimate) + results['shift'].append(qp.error.ae(true_prevalence, train_prevalence)) + results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate)) + results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate)) + results['sre'].append(qp.error.sre(prevs_true=true_prevalence, prevs_hat=point_estimate, prevs_train=train_prevalence)) + results['test-time'].append(ttime) + + pbar.set_description( + f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f}') + + report = { + 'optim_hyper': best_hyperparams, + 'train_time': tr_time, + 'train-prev': train_prevalence, + 'results': {k: np.asarray(v) for k, v in results.items()}, + } + + return report + + +if __name__ == '__main__': + + result_dir = Path('results_map') + + reports = defaultdict(list) + + for data_handler in [UCIMulticlassHandler]: # , UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]: + for dataset in data_handler.iter(): + qp.environ['SAMPLE_SIZE'] = dataset.sample_size + # print(f'dataset={dataset.name}') + + problem_type = 'binary' if dataset.is_binary() else 'multiclass' + for method_name, hyper_params, quantifier in methods(dataset): + + result_path = experiment_path(result_dir / problem_type, dataset.name, method_name) + hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, method_name) + + # if os.path.exists(result_path): + report = qp.util.pickled_resource( + result_path, experiment, dataset, quantifier, method_name, hyper_params, hyper_path + ) + reports['dataset'].append(dataset.name) + reports['method'].append(method_name) + reports['MAE'].append(report["results"]["ae"].mean()) + reports['SRE'].append(report["results"]["sre"].mean()) + reports['h'].append(report["optim_hyper"]["bandwidth"]) + + print(f'dataset={dataset.name}, ' + f'method={method_name}: ' + f'mae={reports["MAE"][-1]:.5f}, ' + f'W={reports["SRE"][-1]:.5f} ' + f'h={reports["h"][-1]}') + + pv = pd.DataFrame(reports).pivot_table(values=['MAE', 'SRE', 'h'], index='dataset', columns='method', margins=True) + print(pv) + + + diff --git a/BayesianKDEy/methods.py b/BayesianKDEy/methods.py new file mode 100644 index 0000000..55e6625 --- /dev/null +++ b/BayesianKDEy/methods.py @@ -0,0 +1,168 @@ +from abc import ABC, abstractmethod +import numpy as np +from sklearn.linear_model import LogisticRegression + +from BayesianKDEy._bayeisan_kdey import BayesianKDEy +from BayesianKDEy._bayesian_mapls import BayesianMAPLS +from BayesianKDEy.commons import KDEyCLR, KDEyCLR2 +from quapy.method.aggregative import CC, ACC, EMQ, DMy, KDEyML +from quapy.method.base import BaseQuantifier +from quapy.method.confidence import AggregativeBootstrap, BayesianCC + + +def get_experimental_methods(): + #yield BootsCC() + #yield BayACC() + #yield BayEMQ() + #yield BayKDEyGau() + #yield BayKDEyAit() + yield BayKDEyAit2() + + +# commons +# ------------------------------------------------------------ +Cls = LogisticRegression +cls_hyper = { + 'classifier__C': np.logspace(-4,4,9), + 'classifier__class_weight': ['balanced', None] +} +hdy_hyper = {'nbins': [3, 4, 5, 8, 9, 10, 12, 14, 16, 32], **cls_hyper} +kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper} +kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper} + +def hyper2cls(hyperparams): + return {k.replace('classifier__', ''): v for k, v in hyperparams.items()} + + +# method descriptor logic +# ------------------------------------------------------------ +class MethodDescriptor(ABC): + def __init__(self, + name: str, + surrogate_quantifier: BaseQuantifier, + hyper_parameters: dict, + ): + self.name = name + self.surrogate_quantifier_ = surrogate_quantifier + self.hyper_parameters = hyper_parameters + + @abstractmethod + def binary_only(self): ... + + def surrogate_quantifier(self): + return self.surrogate_quantifier_ + + def surrogate_quantifier_name(self): + return self.surrogate_quantifier_.__class__.__name__ + + @abstractmethod + def uncertainty_aware_quantifier(self, hyperparameters): ... + + +class MulticlassMethodDescriptor(MethodDescriptor): + + def binary_only(self): + return False + + +# specific methods definitions +# ------------------------------------------------------------ +# ------------------------------------------------------------ + +# Bootstrap approaches: +# ------------------------------------------------------------ +class BootsCC(MulticlassMethodDescriptor): + def __init__(self): + super().__init__(name='BoCC', surrogate_quantifier=CC(Cls()), hyper_parameters=cls_hyper) + + def uncertainty_aware_quantifier(self, hyperparameters): + quantifier = CC(Cls()).set_params(**hyperparameters) + return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0) + + +# class BootsACC(MulticlassMethodDescriptor): +# def __init__(self): +# super().__init__(name='BoACC', surrogate_quantifier=ACC(Cls()), hyper_parameters=cls_hyper) +# +# def uncertainty_aware_quantifier(self, hyperparameters): +# quantifier = ACC(Cls()).set_params(**hyperparameters) +# return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0) +# +# +# class BootsEMQ(MulticlassMethodDescriptor): +# def __init__(self): +# super().__init__(name='BoEMQ', surrogate_quantifier=EMQ(Cls(), exact_train_prev=False), hyper_parameters=cls_hyper) +# +# def uncertainty_aware_quantifier(self, hyperparameters): +# quantifier = EMQ(Cls(), exact_train_prev=False).set_params(**hyperparameters) +# return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0) + + +# class BootsHDy(MethodDescriptor): +# def __init__(self): +# super().__init__(name='BoHDy', surrogate_quantifier=DMy(Cls()), hyper_parameters=hdy_hyper) +# +# def uncertainty_aware_quantifier(self, hyperparameters): +# quantifier = DMy(Cls()).set_params(**hyperparameters) +# return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0) +# +# def binary_only(self): +# return True + +# class BootsKDEy(MulticlassMethodDescriptor): +# def __init__(self): +# super().__init__(name='BoKDEy', surrogate_quantifier=KDEyML(Cls()), hyper_parameters=kdey_hyper) +# +# def uncertainty_aware_quantifier(self, hyperparameters): +# quantifier = KDEyML(Cls()).set_params(**hyperparameters) +# return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0) + + +# Bayesian approaches: +# ------------------------------------------------------------ +class BayACC(MulticlassMethodDescriptor): + def __init__(self): + super().__init__(name='BaACC!', surrogate_quantifier=ACC(Cls()), hyper_parameters=cls_hyper) + + def uncertainty_aware_quantifier(self, hyperparameters): + classifier = Cls(**hyper2cls(hyperparameters)) + return BayesianCC(classifier, temperature=None, mcmc_seed=0) # is actually a Bayesian variant of ACC + + +class BayEMQ(MulticlassMethodDescriptor): + def __init__(self): + super().__init__(name='BaEMQ!', surrogate_quantifier=EMQ(Cls(), exact_train_prev=False), hyper_parameters=cls_hyper) + + def uncertainty_aware_quantifier(self, hyperparameters): + classifier = Cls(**hyper2cls(hyperparameters)) + return BayesianMAPLS(classifier, prior='uniform', temperature=None, exact_train_prev=False) + + +class BayKDEyGau(MulticlassMethodDescriptor): + def __init__(self): + kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper} + super().__init__(name='BaKDE-Gau-T!', surrogate_quantifier=KDEyML(Cls()), hyper_parameters=kdey_hyper) + + def uncertainty_aware_quantifier(self, hyperparameters): + return BayesianKDEy(Cls(), kernel='gaussian', temperature=None, mcmc_seed=0, **hyperparameters) + + +class BayKDEyAit(MulticlassMethodDescriptor): + def __init__(self): + kdey_hyper = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper} + super().__init__(name='BaKDE-Ait-T!', surrogate_quantifier=KDEyCLR(Cls()), hyper_parameters=kdey_hyper) + + def uncertainty_aware_quantifier(self, hyperparameters): + return BayesianKDEy(Cls(), kernel='aitchison', temperature=None, mcmc_seed=0, **hyperparameters) + + +class BayKDEyAit2(MulticlassMethodDescriptor): + def __init__(self): + kdey_hyper = {'bandwidth': np.linspace(0.05, 2., 10), **cls_hyper} + super().__init__(name='BaKDE-Ait-T!2', surrogate_quantifier=KDEyCLR2(Cls()), hyper_parameters=kdey_hyper) + + def uncertainty_aware_quantifier(self, hyperparameters): + return BayesianKDEy(Cls(), kernel='aitchison', temperature=None, mcmc_seed=0, **hyperparameters) + + + diff --git a/BayesianKDEy/plot_simplex.py b/BayesianKDEy/plot_simplex.py index 2f1a79e..af44a6a 100644 --- a/BayesianKDEy/plot_simplex.py +++ b/BayesianKDEy/plot_simplex.py @@ -10,6 +10,8 @@ from sklearn.preprocessing import MinMaxScaler from BayesianKDEy.commons import antagonistic_prevalence, in_simplex from method.confidence import (ConfidenceIntervals as CI, + ConfidenceIntervalsCLR as CICLR, + ConfidenceIntervalsILR as CIILR, ConfidenceEllipseSimplex as CE, ConfidenceEllipseCLR as CLR, ConfidenceEllipseILR as ILR) @@ -260,6 +262,8 @@ def plot_regions(ax, region_layers, resolution, confine): ) + + def plot_points(ax, point_layers): for layer in point_layers: pts = layer["points"] @@ -433,25 +437,34 @@ def plot_kernels(): if __name__ == '__main__': np.random.seed(1) - # n = 1000 - # alpha = [1,1,1] - # prevs = np.random.dirichlet(alpha, size=n) + n = 1000 + alpha = [15,10,7] + prevs = np.random.dirichlet(alpha, size=n) - # def regions(): - # confs = [0.99, 0.95, 0.90] - # yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs] + def regions(): + confs = [0.9, 0.95, 0.99] + # yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs] # yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs] + # yield 'CI-CLR', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c).coverage) for c in confs] + # yield 'CI-CLR-b', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs] + # yield 'CI-ILR', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c).coverage) for c in confs] + yield 'CI-ILR-b', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs] # yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs] - # yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs] - # yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs] + # yield 'CE-CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs] + # yield 'CE-ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs] - # resolution = 1000 - # alpha_str = ','.join([f'{str(i)}' for i in alpha]) - # for crname, cr in regions(): - # plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution, - # color='blue', - # save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png', - # ) + resolution = 1000 + alpha_str = ','.join([f'{str(i)}' for i in alpha]) + + dot_style = {"color": "gray", "alpha": .5, "s": 15, 'linewidth': .25, 'edgecolors': "black"} + point_layer = [ + {"points": prevs, "label": "points", "style": dot_style}, + ] + + for crname, cr in regions(): + region = [{'fn': fn, 'alpha':.6, 'label':label} for label, fn in cr] + + plot_simplex(point_layers=point_layer, region_layers=region, show_legend=False, resolution=resolution, save_path=f'./plots/regions/{crname}.png') # def regions(): # confs = [0.99, 0.95, 0.90] @@ -544,4 +557,4 @@ if __name__ == '__main__': # save_path=f'./plots/prior_test/concentration_{c}.png' # ) - plot_kernels() \ No newline at end of file + # plot_kernels() \ No newline at end of file diff --git a/BayesianKDEy/temperature_calibration.py b/BayesianKDEy/temperature_calibration.py index f1b9326..5dc6540 100644 --- a/BayesianKDEy/temperature_calibration.py +++ b/BayesianKDEy/temperature_calibration.py @@ -13,12 +13,14 @@ def temp_calibration(method:WithConfidenceABC, val_prot:AbstractProtocol, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], nominal_coverage=0.95, - amplitude_threshold='auto', + amplitude_threshold=1., + criterion='winkler', n_jobs=1, verbose=True): - assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold < 1.), \ - f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float < 1.0.' + assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold <= 1.), \ + f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float <= 1.0.' + assert criterion in {'auto', 'winkler'}, f'unknown {criterion=}; valid ones are auto or winkler' if amplitude_threshold=='auto': n_classes = train.n_classes @@ -27,15 +29,16 @@ def temp_calibration(method:WithConfidenceABC, if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1: print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions') - def evaluate_temperature_job(job_id, temp): - if verbose: - print(f'\tstarting exploration with temperature={temp}...') + def _evaluate_temperature_job(job_id, temp): + # if verbose: + # print(f'\tstarting exploration with temperature={temp}...') local_method = copy.deepcopy(method) local_method.temperature = temp coverage = 0 amplitudes = [] + winklers = [] # errs = [] pbar = tqdm(enumerate(val_prot()), position=job_id, total=val_prot.total(), disable=not verbose) @@ -47,35 +50,62 @@ def temp_calibration(method:WithConfidenceABC, coverage += 1 amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000)) + winkler = None + if criterion=='winkler': + winkler = conf_region.mean_winkler_score(true_prev=prev, alpha=0.005) + winklers.append(winkler) + # errs.append(qp.error.mae(prev, point_estim)) - pbar.set_description(f'job={job_id} T={temp}: coverage={coverage/(i+1)*100:.2f}% amplitude={np.mean(amplitudes)*100:.2f}%') + pbar.set_description( + f'job={job_id} T={temp}: ' + f'coverage={coverage/(i+1)*100:.2f}% ' + f'amplitude={np.mean(amplitudes)*100:.4f}% ' + + f'winkler={np.mean(winklers):.4f}%' if criterion=='winkler' else '' + ) mean_coverage = coverage / val_prot.total() mean_amplitude = np.mean(amplitudes) + winkler_mean = np.mean(winklers) if criterion=='winkler' else None - if verbose: - print(f'Temperature={temp} got coverage={mean_coverage*100:.2f}% amplitude={mean_amplitude*100:.2f}%') + # if verbose: + # print( + # f'Temperature={temp} got ' + # f'coverage={mean_coverage*100:.2f}% ' + # f'amplitude={mean_amplitude*100:.2f}% ' + # + f'winkler={winkler_mean:.4f}' if criterion == 'winkler' else '' + # ) - return temp, mean_coverage, mean_amplitude + return temp, mean_coverage, mean_amplitude, winkler_mean temp_grid = sorted(temp_grid) method.fit(*train.Xy) raw_results = Parallel(n_jobs=n_jobs, backend="loky")( - delayed(evaluate_temperature_job)(job_id, temp) + delayed(_evaluate_temperature_job)(job_id, temp) for job_id, temp in tqdm(enumerate(temp_grid), disable=not verbose) ) results = [ - (temp, cov, amp) - for temp, cov, amp in raw_results + (temp, cov, amp, wink) + for temp, cov, amp, wink in raw_results if amp < amplitude_threshold ] chosen_temperature = 1. if len(results) > 0: - chosen_temperature = min(results, key=lambda x: abs(x[1]-nominal_coverage))[0] + if criterion=='winkler': + # choose min winkler + chosen_temperature, ccov, camp, cwink = min(results, key=lambda x: x[3]) + else: + # choose best coverage (regardless of amplitude), i.e., closest to nominal + chosen_temperature, ccov, camp, cwink = min(results, key=lambda x: abs(x[1]-nominal_coverage)) - print(f'chosen_temperature={chosen_temperature:.2f}') + if verbose: + print( + f'\nChosen_temperature={chosen_temperature:.2f} got ' + f'coverage={ccov*100:.2f}% ' + f'amplitude={camp*100:.4f}% ' + + f'winkler={cwink:.4f}' if criterion=='winkler' else '' + ) return chosen_temperature diff --git a/CHANGE_LOG.txt b/CHANGE_LOG.txt index 9c10d20..28d11a4 100644 --- a/CHANGE_LOG.txt +++ b/CHANGE_LOG.txt @@ -1,15 +1,21 @@ Change Log 0.2.1 ----------------- +- Added mechanisms for Temperature Calibration for coverage +- Added MAPLS and BayesianEMQ - Added DirichletProtocol, which allows to generate samples according to a parameterized Dirichlet prior. -- Added squared ratio error. - Improved efficiency of confidence regions coverage functions - Added Precise Quantifier to WithConfidence methods (a Bayesian adaptation of HDy) +- Added Temperature parameter to BayesianCC - Improved documentation of confidence regions. - Added ReadMe method by Daniel Hopkins and Gary King - Internal index in LabelledCollection is now "lazy", and is only constructed if required. -- Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric. -- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge +- Added new error metrics: + - dist_aitchison and mean_dist_aitchison as a new error evaluation metric. + - squared ratio error. +- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where + densities for small bandwidths may become huge. + Change Log 0.2.0 ----------------- diff --git a/TODO.txt b/TODO.txt index 2e1652b..0259e74 100644 --- a/TODO.txt +++ b/TODO.txt @@ -1,6 +1,11 @@ Adapt examples; remaining: example 4-onwards not working: 15 (qunfold) +Unify ConfidenceIntervalsTransformation with ConfidenceEllipseTransformation + +Unify functionality of withconfidence methods; the predict_conf has a clear similar structure across all + variants, and should be unified in the super class + Solve the warnings issue; right now there is a warning ignore in method/__init__.py: Add 'platt' to calib options in EMQ? @@ -46,7 +51,15 @@ Para quitar el labelledcollection de los métodos: - proporción en [0,1] - fit_classifier=False: - +- [TODO] add RLSbench?: + - https://arxiv.org/pdf/2302.03020 + - https://github.com/acmi-lab/RLSbench +- [TODO] check Table shift + - https://proceedings.neurips.cc/paper_files/paper/2023/hash/a76a757ed479a1e6a5f8134bea492f83-Abstract-Datasets_and_Benchmarks.html +- [TODO] have a look at TorchDrift + - https://torchdrift.org/ +- [TODO] have a look at + - https://github.com/SeldonIO/alibi-detect/ - [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq - [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0 diff --git a/quapy/method/_bayesian.py b/quapy/method/_bayesian.py index cd0e8c2..b3b49d7 100644 --- a/quapy/method/_bayesian.py +++ b/quapy/method/_bayesian.py @@ -33,7 +33,7 @@ P_TEST_C: str = "P_test(C)" P_C_COND_Y: str = "P(C|Y)" -def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, alpha: np.ndarray) -> None: +def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, temperature:float, alpha: np.ndarray) -> None: """ Defines a probabilistic model in `NumPyro `_. @@ -50,11 +50,40 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, a pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.asarray(alpha, dtype=jnp.float32))) p_c_cond_y = numpyro.sample(P_C_COND_Y, dist.Dirichlet(jnp.ones(K).repeat(L).reshape(L, K))) - with numpyro.plate('plate', L): - numpyro.sample('F_yc', dist.Multinomial(n_y_labeled, p_c_cond_y), obs=n_y_and_c_labeled) + if temperature==1: + # original implementation + with numpyro.plate('plate', L): + numpyro.sample('F_yc', dist.Multinomial(n_y_labeled, p_c_cond_y), obs=n_y_and_c_labeled) - p_c = numpyro.deterministic(P_TEST_C, jnp.einsum("yc,y->c", p_c_cond_y, pi_)) - numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled) + p_c = numpyro.deterministic(P_TEST_C, jnp.einsum("yc,y->c", p_c_cond_y, pi_)) + numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled) + else: + # with temperature modification + with numpyro.plate('plate_y', L): + logp_F = dist.Multinomial( + n_y_labeled, + p_c_cond_y + ).log_prob(n_y_and_c_labeled) + + numpyro.factor( + 'F_yc_loglik', + jnp.sum(logp_F) / temperature + ) + p_c = numpyro.deterministic( + P_TEST_C, + jnp.einsum("yc,y->c", p_c_cond_y, pi_) + ) + + # Likelihood datos no etiquetados + logp_N = dist.Multinomial( + jnp.sum(n_c_unlabeled), + p_c + ).log_prob(n_c_unlabeled) + + numpyro.factor( + 'N_c_loglik', + logp_N / temperature + ) def sample_posterior_bayesianCC( @@ -63,6 +92,7 @@ def sample_posterior_bayesianCC( num_warmup: int, num_samples: int, alpha: np.ndarray, + temperature = 1., seed: int = 0, ) -> dict: """ @@ -88,7 +118,7 @@ def sample_posterior_bayesianCC( progress_bar=False ) rng_key = jax.random.PRNGKey(seed) - mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, alpha=alpha) + mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, temperature=temperature, alpha=alpha) return mcmc.get_samples() diff --git a/quapy/method/_kdey.py b/quapy/method/_kdey.py index 09df327..95f23ae 100644 --- a/quapy/method/_kdey.py +++ b/quapy/method/_kdey.py @@ -178,17 +178,24 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase): with qp.util.temp_seed(self.random_state): epsilon = 1e-12 n_classes = len(self.mix_densities) - #test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities] - test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities] + if n_classes>=30: + # new version: improves numerical stability with logsumexp, at the cost of optimization efficiency. + # needed if the number of classes is large (approx >= 30) because densities tend to grow exponentially + test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities] - #def neg_loglikelihood(prev): - # prev = np.clip(prev, epsilon, 1.0) - # test_mixture_likelihood = prev @ test_densities - # test_loglikelihood = np.log(test_mixture_likelihood + epsilon) - # return -np.sum(test_loglikelihood) - def neg_loglikelihood(prev): - test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0) - return -np.sum(test_loglikelihood) + def neg_loglikelihood(prev): + prev = np.clip(prev, epsilon, 1.0) + test_loglikelihood = logsumexp(np.log(prev)[:,None] + test_log_densities, axis=0) + return -np.sum(test_loglikelihood) + else: + # original implementation + test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities] + + def neg_loglikelihood(prev): + # prev = np.clip(prev, epsilon, 1.0) + test_mixture_likelihood = prev @ test_densities + test_loglikelihood = np.log(test_mixture_likelihood + epsilon) + return -np.sum(test_loglikelihood) return F.optim_minimize(neg_loglikelihood, n_classes) diff --git a/quapy/method/aggregative.py b/quapy/method/aggregative.py index d565140..8be9694 100644 --- a/quapy/method/aggregative.py +++ b/quapy/method/aggregative.py @@ -163,6 +163,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC): :param X: array-like of shape `(n_samples, n_features)`, the training instances :param y: array-like of shape `(n_samples,)`, the labels + :return: a tuple (predictions, labels) """ self._check_classifier(adapt_if_necessary=self.fit_classifier) diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py index 10b2245..d4416c0 100644 --- a/quapy/method/confidence.py +++ b/quapy/method/confidence.py @@ -341,6 +341,7 @@ class ConfidenceIntervals(ConfidenceRegionABC): """ def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False): assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)' + assert samples.ndim == 2, 'unexpected shape; must be (n_bootstrap_samples, n_classes)' samples = np.asarray(samples) @@ -383,6 +384,10 @@ class ConfidenceIntervals(ConfidenceRegionABC): return proportion + def coverage_soft(self, true_value): + within_intervals = np.logical_and(self.I_low <= true_value, true_value <= self.I_high) + return np.mean(within_intervals.astype(float)) + def __repr__(self): return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']' @@ -390,25 +395,30 @@ class ConfidenceIntervals(ConfidenceRegionABC): def n_dim(self): return len(self.I_low) - def winkler_scores(self, true_prev): + def winkler_scores(self, true_prev, alpha=None, add_ae=False): true_prev = np.asarray(true_prev) assert true_prev.ndim == 1, 'unexpected dimensionality for true_prev' assert len(true_prev)==self.n_dim, \ f'unexpected number of dimensions; found {true_prev.ndim}, expected {self.n_dim}' - def winkler_score(low, high, true_val, alpha): + def winkler_score(low, high, true_val, alpha, center): amp = high-low - scale_cost = 1./alpha + scale_cost = 2./alpha cost = np.max([0, low-true_val], axis=0) + np.max([0, true_val-high], axis=0) - return amp + scale_cost*cost + err = 0 + if add_ae: + err = abs(true_val - center) + return amp + scale_cost*cost + err + alpha = alpha or self.alpha return np.asarray( - [winkler_score(low_i, high_i, true_v, self.alpha) - for (low_i, high_i, true_v) in zip(self.I_low, self.I_high, true_prev)] + [winkler_score(low_i, high_i, true_v, alpha, center) + for (low_i, high_i, true_v, center) in zip(self.I_low, self.I_high, true_prev, self.point_estimate())] ) - def mean_winkler_score(self, true_prev): - return np.mean(self.winkler_scores(true_prev)) + def mean_winkler_score(self, true_prev, alpha=None, add_ae=False): + return np.mean(self.winkler_scores(true_prev, alpha=alpha, add_ae=add_ae)) + class ConfidenceEllipseSimplex(ConfidenceRegionABC): @@ -486,8 +496,8 @@ class ConfidenceEllipseTransformed(ConfidenceRegionABC): samples = np.asarray(samples) self.transformation = transformation Z = self.transformation(samples) - # self.mean_ = np.mean(samples, axis=0) - self.mean_ = self.transformation.inverse(np.mean(Z, axis=0)) + self.mean_ = np.mean(samples, axis=0) + # self.mean_ = self.transformation.inverse(np.mean(Z, axis=0)) self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level) self._samples = samples self.alpha = 1.-confidence_level @@ -549,7 +559,99 @@ class ConfidenceEllipseILR(ConfidenceEllipseTransformed): +class ConfidenceIntervalsTransformed(ConfidenceRegionABC): + """ + Instantiates a Confidence Interval region in a transformed space. + :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes) + :param confidence_level: float, the confidence level (default 0.95) + :param bonferroni_correction: bool (default False), if True, a Bonferroni correction + is applied to the significance level (`alpha`) before computing confidence intervals. + The correction consists of replacing `alpha` with `alpha/n_classes`. When + `n_classes=2` the correction is not applied because there is only one verification test + since the other class is constrained. This is not necessarily true for n_classes>2. + """ + + def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95, bonferroni_correction=False): + samples = np.asarray(samples) + self.transformation = transformation + Z = self.transformation(samples) + self.mean_ = np.mean(samples, axis=0) + # self.mean_ = self.transformation.inverse(np.mean(Z, axis=0)) + self.conf_region_z = ConfidenceIntervals(Z, confidence_level=confidence_level, bonferroni_correction=bonferroni_correction) + self._samples = samples + self.alpha = 1.-confidence_level + + @property + def samples(self): + return self._samples + + def point_estimate(self): + """ + Returns the point estimate, the center of the ellipse. + + :return: np.ndarray of shape (n_classes,) + """ + # The inverse of the CLR does not coincide with the true mean, because the geometric mean + # requires smoothing the prevalence vectors and this affects the softmax (inverse); + # return self.clr.inverse(self.mean_) # <- does not coincide + return self.mean_ + + def coverage(self, true_value): + """ + Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the + fraction of these that are contained in the region, if more than one value is passed. If only one value is + passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively. + + :param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,) + :return: float in [0,1] + """ + transformed_values = self.transformation(true_value) + return self.conf_region_z.coverage(transformed_values) + + def coverage_soft(self, true_value): + transformed_values = self.transformation(true_value) + return self.conf_region_z.coverage_soft(transformed_values) + + def winkler_scores(self, true_prev, alpha=None, add_ae=False): + transformed_values = self.transformation(true_prev) + return self.conf_region_z.winkler_scores(transformed_values, alpha=alpha, add_ae=add_ae) + + def mean_winkler_score(self, true_prev, alpha=None, add_ae=False): + transformed_values = self.transformation(true_prev) + return self.conf_region_z.mean_winkler_score(transformed_values, alpha=alpha, add_ae=add_ae) + + +class ConfidenceIntervalsCLR(ConfidenceIntervalsTransformed): + """ + Instantiates a Confidence Intervals in the Centered-Log Ratio (CLR) space. + + :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes) + :param confidence_level: float, the confidence level (default 0.95) + :param bonferroni_correction: bool (default False), if True, a Bonferroni correction + is applied to the significance level (`alpha`) before computing confidence intervals. + The correction consists of replacing `alpha` with `alpha/n_classes`. When + `n_classes=2` the correction is not applied because there is only one verification test + since the other class is constrained. This is not necessarily true for n_classes>2. + """ + def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False): + super().__init__(samples, CLRtransformation(), confidence_level=confidence_level, bonferroni_correction=bonferroni_correction) + + +class ConfidenceIntervalsILR(ConfidenceIntervalsTransformed): + """ + Instantiates a Confidence Intervals in the Isometric-Log Ratio (CLR) space. + + :param samples: np.ndarray of shape (n_bootstrap_samples, n_classes) + :param confidence_level: float, the confidence level (default 0.95) + :param bonferroni_correction: bool (default False), if True, a Bonferroni correction + is applied to the significance level (`alpha`) before computing confidence intervals. + The correction consists of replacing `alpha` with `alpha/n_classes`. When + `n_classes=2` the correction is not applied because there is only one verification test + since the other class is constrained. This is not necessarily true for n_classes>2. + """ + def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False): + super().__init__(samples, ILRtransformation(), confidence_level=confidence_level, bonferroni_correction=bonferroni_correction) @@ -611,23 +713,35 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier): self.verbose = verbose def aggregation_fit(self, classif_predictions, labels): - data = LabelledCollection(classif_predictions, labels, classes=self.classes_) + self.quantifiers = [] if self.n_train_samples==1: self.quantifier.aggregation_fit(classif_predictions, labels) self.quantifiers.append(self.quantifier) else: - # model-based bootstrap (only on the aggregative part) - n_examples = len(data) - full_index = np.arange(n_examples) - with qp.util.temp_seed(self.random_state): - for i in range(self.n_train_samples): - quantifier = copy.deepcopy(self.quantifier) - index = resample(full_index, n_samples=n_examples) - classif_predictions_i = classif_predictions.sampling_from_index(index) - data_i = data.sampling_from_index(index) - quantifier.aggregation_fit(classif_predictions_i, data_i) - self.quantifiers.append(quantifier) + if classif_predictions is None or labels is None: + # The entire dataset was consumed for classifier training, implying there is no need for training + # an aggregation function. If the bootstrap method was configured to train different aggregators + # (i.e., self.n_train_samples>1), then an error is raise. Otherwise, the method ends. + if self.n_train_samples > 1: + raise ValueError( + f'The underlying quantifier ({self.quantifier.__class__.__name__}) has consumed, all training ' + f'data, meaning the aggregation function needs none, but {self.n_train_samples=} is > 1, which ' + f'is inconsistent.' + ) + else: + # model-based bootstrap (only on the aggregative part) + data = LabelledCollection(classif_predictions, labels, classes=self.classes_) + n_examples = len(data) + full_index = np.arange(n_examples) + with qp.util.temp_seed(self.random_state): + for i in range(self.n_train_samples): + quantifier = copy.deepcopy(self.quantifier) + index = resample(full_index, n_samples=n_examples) + classif_predictions_i = classif_predictions.sampling_from_index(index) + data_i = data.sampling_from_index(index) + quantifier.aggregation_fit(classif_predictions_i, data_i) + self.quantifiers.append(quantifier) return self def aggregate(self, classif_predictions: np.ndarray): @@ -653,8 +767,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier): return prev_estim, conf def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None): - if confidence_level is None: - confidence_level = self.confidence_level + confidence_level = confidence_level or self.confidence_level + n_samples = classif_predictions.shape[0] prevs = [] @@ -665,11 +779,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier): for i in range(self.n_test_samples) ) prevs.extend(results) - # for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose): - # sample_i = resample(classif_predictions, n_samples=n_samples) - # prev_i = quantifier.aggregate(sample_i) - # prevs.append(prev_i) + prevs = np.array(prevs) conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region) prev_estim = conf.point_estimate() @@ -741,6 +852,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC): mcmc_seed: int = 0, confidence_level: float = 0.95, region: str = 'intervals', + temperature = 1., prior = 'uniform'): if num_warmup <= 0: @@ -761,6 +873,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC): self.mcmc_seed = mcmc_seed self.confidence_level = confidence_level self.region = region + self.temperature = temperature self.prior = prior # Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances @@ -804,6 +917,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC): num_warmup=self.num_warmup, num_samples=self.num_samples, alpha=alpha, + temperature=self.temperature, seed=self.mcmc_seed, ) return self._samples diff --git a/result_table b/result_table new file mode 160000 index 0000000..9d433e3 --- /dev/null +++ b/result_table @@ -0,0 +1 @@ +Subproject commit 9d433e3e35b4d111a3914a1e7d3257a8fcf24a9b