From a6336218e26c0c61127702f71217018f5e90f2a1 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Tue, 20 Jan 2026 10:53:16 +0100 Subject: [PATCH] adding lequa datasets --- BayesianKDEy/TODO.txt | 11 +- BayesianKDEy/_bayeisan_kdey.py | 6 +- BayesianKDEy/_bayesian_mapls.py | 6 +- BayesianKDEy/commons.py | 153 ++++++++++++++++++++--- BayesianKDEy/full_experiments.py | 158 ++++++++++++------------ BayesianKDEy/generate_results.py | 130 +++++++++---------- BayesianKDEy/temperature_calibration.py | 54 ++------ 7 files changed, 313 insertions(+), 205 deletions(-) diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt index 77accb9..d4d8d58 100644 --- a/BayesianKDEy/TODO.txt +++ b/BayesianKDEy/TODO.txt @@ -1,4 +1,13 @@ -- Add other methods that natively provide uncertainty quantification methods? (e.g., Ratio estimator, Card & Smith) +- Things to try: + - init chain helps? [seems irrelevant in MAPLS...] + - Aitchison kernel is better? + - other classifiers? + - optimize classifier? + - use all datasets? + - improve KDE on wine-quality? +- Add other methods that natively provide uncertainty quantification methods? + Ratio estimator + Card & Smith - MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever) - Implement Interval Score or Winkler Score - analyze across shift diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py index 852ced8..aa6c8d1 100644 --- a/BayesianKDEy/_bayeisan_kdey.py +++ b/BayesianKDEy/_bayeisan_kdey.py @@ -59,7 +59,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): temperature=1., engine='numpyro', prior='uniform', - verbose: bool = False): + verbose: bool = False, + **kwargs): if num_warmup <= 0: raise ValueError(f'parameter {num_warmup=} must be a positive integer') @@ -74,6 +75,9 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): assert engine in ['rw-mh', 'emcee', 'numpyro'] super().__init__(classifier, fit_classifier, val_split) + assert all(k.startswith('classifier__') for k in kwargs.keys()), 'unexpected kwargs; must start with "classifier__"' + self.classifier.set_params(**{k.replace('classifier__', ''):v for k,v in kwargs.items()}) # <- improve + self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel) self.kernel = self._check_kernel(kernel) self.num_warmup = num_warmup diff --git a/BayesianKDEy/_bayesian_mapls.py b/BayesianKDEy/_bayesian_mapls.py index cde6509..35b55b3 100644 --- a/BayesianKDEy/_bayesian_mapls.py +++ b/BayesianKDEy/_bayesian_mapls.py @@ -39,6 +39,7 @@ class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC): region: str = 'intervals', temperature=1., prior='uniform', + mapls_chain_init=True, verbose=False ): @@ -53,6 +54,7 @@ class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC): self.region = region self.temperature = temperature self.prior = prior + self.mapls_chain_init = mapls_chain_init self.verbose = verbose def aggregation_fit(self, classif_predictions, labels): @@ -74,7 +76,7 @@ class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC): return_lambda=True ) - # pi_star: MAP in simplex (shape: [K]), convert to ILR space + # pi_star: MAP in simplex shape (n_classes,) and convert to ILR space z0 = self.ilr(pi_star) if self.prior == 'uniform': @@ -107,7 +109,7 @@ class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC): random.PRNGKey(self.mcmc_seed), test_posteriors=classif_predictions, alpha=alpha, - init_params={"z": z0} + init_params={"z": z0} if self.mapls_chain_init else None ) samples = mcmc.get_samples()["z"] diff --git a/BayesianKDEy/commons.py b/BayesianKDEy/commons.py index 01bbdc0..35819b2 100644 --- a/BayesianKDEy/commons.py +++ b/BayesianKDEy/commons.py @@ -14,11 +14,134 @@ import numpy as np from method.aggregative import KDEyML from quapy.functional import l1_norm, ILRtransformation from scipy.stats import entropy +from abc import ABC, abstractmethod +FINEGRAINED = True +RESULT_DIR = Path('results_finegrained') if FINEGRAINED else Path('results') -def fetch_UCI_multiclass(data_name): - return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01) + +class DatasetHandler(ABC): + + def __init__(self, name:str, sample_size:int): + self._name = name + self._sample_size = sample_size + + @abstractmethod + def get_training(self): ... + + @abstractmethod + def get_train_testprot_for_eval(self): ... + + @abstractmethod + def get_train_valprot_for_modsel(self): ... + + def sample_size(self): + return self._sample_size + + def name(self): + return self._name + + @classmethod + @abstractmethod + def iter(cls): ... + + def __repr__(self): + return self.__class__.__name__ + + @classmethod + @abstractmethod + def is_binary(self): + ... + + +class UCIMulticlassHandler(DatasetHandler): + + DATASETS = qp.datasets.UCI_MULTICLASS_DATASETS.copy() + + def __init__(self, name, n_val_samples=100, n_test_samples=100): + super().__init__(name, sample_size=1000) + self._dataset = None # lazy fetch + self.n_val_samples = n_val_samples + self.n_test_samples = n_test_samples + + def get_training(self): + return self.dataset().training + + def get_train_testprot_for_eval(self): + training, test = self.dataset().train_test + test_generator = qp.protocol.UPP(test, repeats=self.n_test_samples, random_state=0) + return training, test_generator + + def get_train_valprot_for_modsel(self): + training = self.dataset().training + training, val = training.split_stratified(train_prop=0.6, random_state=0) + val_generator = qp.protocol.UPP(val, repeats=self.n_val_samples, random_state=0) + return training, val_generator + + @lru_cache(maxsize=None) + def dataset(self): + if self._dataset is None: + self._dataset = qp.datasets.fetch_UCIMulticlassDataset(self.name(), min_class_support=0.01) + return self._dataset + + def __repr__(self): + return "" # self.dataset().__repr__() + + @classmethod + def iter(cls): + for name in cls.DATASETS: + yield cls(name) + + @classmethod + def is_binary(self): + return False + + +class LeQuaHandler(DatasetHandler): + + DATASETS = ['LeQua2022', 'LeQua2024'] + + def __init__(self, name): + super().__init__(name, sample_size=1000) + self._dataset = None # lazy fetch + + def get_training(self): + return self.dataset()[0] + + def get_train_testprot_for_eval(self): + training, _, test_generator = self.dataset() + return training, test_generator + + def get_train_valprot_for_modsel(self): + training, val_generator, _ = self.dataset() + return training, val_generator + + @lru_cache(maxsize=None) + def dataset(self): + if self._dataset is None: + if self.name()=='LeQua2022': + self._dataset = qp.datasets.fetch_lequa2022(task='T1B') + elif self.name()=='LeQua2024': + self._dataset = qp.datasets.fetch_lequa2024(task='T2') + else: + raise ValueError(f'unexpected dataset name {self.name()}; valid ones are {self.DATASETS}') + return self._dataset + + def __repr__(self): + return self.dataset().__repr__() + + @classmethod + def iter(cls): + for name in cls.DATASETS: + yield cls(name) + + @classmethod + def is_binary(self): + return False + +# def fetch_UCI_multiclass(data_name): +# return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01) def fetch_UCI_binary(data_name): @@ -32,18 +155,20 @@ binary = { 'sample_size': 500 } -multiclass = { - 'datasets': qp.datasets.UCI_MULTICLASS_DATASETS.copy(), - 'fetch_fn': fetch_UCI_multiclass, - 'sample_size': 1000 -} -try: - multiclass['datasets'].remove('poker_hand') # random performance - multiclass['datasets'].remove('hcv') # random performance - multiclass['datasets'].remove('letter') # many classes - multiclass['datasets'].remove('isolet') # many classes -except ValueError: - pass +# multiclass = { +# 'datasets': qp.datasets.UCI_MULTICLASS_DATASETS.copy(), +# 'fetch_fn': fetch_UCI_multiclass, +# 'sample_size': 1000 +# } +# try: +# multiclass['datasets'].remove('poker_hand') # random performance +# multiclass['datasets'].remove('hcv') # random performance +# multiclass['datasets'].remove('letter') # many classes +# multiclass['datasets'].remove('isolet') # many classes +# except ValueError: +# pass + + # utils diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py index 2d10610..ee64c15 100644 --- a/BayesianKDEy/full_experiments.py +++ b/BayesianKDEy/full_experiments.py @@ -5,13 +5,13 @@ from copy import deepcopy as cp import quapy as qp from BayesianKDEy._bayeisan_kdey import BayesianKDEy from BayesianKDEy._bayesian_mapls import BayesianMAPLS -from BayesianKDEy.commons import multiclass, experiment_path, KDEyCLR +from BayesianKDEy.commons import experiment_path, KDEyCLR, FINEGRAINED, RESULT_DIR, DatasetHandler, \ + UCIMulticlassHandler, LeQuaHandler from BayesianKDEy.temperature_calibration import temp_calibration from build.lib.quapy.data import LabelledCollection from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC from quapy.model_selection import GridSearchQ from quapy.data import Dataset -# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot from quapy.method.confidence import BayesianCC, AggregativeBootstrap from quapy.method.aggregative import KDEyML, ACC from quapy.protocol import UPP @@ -21,6 +21,7 @@ from collections import defaultdict from time import time + def methods(): """ Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where: @@ -30,68 +31,58 @@ def methods(): - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the quantifier with optimized hyperparameters """ - acc_hyper = {} - emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs']} - hdy_hyper = {'nbins': [3,4,5,8,16,32]} - kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]} - kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]} + if FINEGRAINED: + lr_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]} + acc_hyper = lr_hyper + emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **lr_hyper} + hdy_hyper = {'nbins': [3,4,5,8,16,32], **lr_hyper} + kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **lr_hyper} + kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **lr_hyper} + else: + acc_hyper = {} + emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs']} + hdy_hyper = {'nbins': [3,4,5,8,16,32]} + kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]} + kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]} + multiclass_method = 'multiclass' only_binary = 'only_binary' only_multiclass = 'only_multiclass' - yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method - yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method + # Bootstrap approaches: + # -------------------------------------------------------------------------------------------------------- + #yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method + #yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method + #yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method + #yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method - yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method - - yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method + # Bayesian approaches: + # -------------------------------------------------------------------------------------------------------- + # yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method # yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary # - yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method - # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method - # yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method - # yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method - # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method - # yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass - # yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass - # yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method - # yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method + #yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', **hyper), multiclass_method # yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method # yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method - # yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method - # yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method - yield f'BaKDE-Ait-numpyro-T*-U', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, prior='uniform', **hyper), multiclass_method - # yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method - # yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method - # yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method - # yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method - # yield 'BayEMQ', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', exact_train_prev=True), multiclass_method - # yield 'BayEMQ*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', exact_train_prev=True), multiclass_method - # yield 'BayEMQ*2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', exact_train_prev=True), multiclass_method - # yield 'BayEMQ*2T*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=None, exact_train_prev=True), multiclass_method - # yield 'BayEMQ*2T01', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=0.1, exact_train_prev=True), multiclass_method - # yield 'BayEMQ*2T10000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=10000, exact_train_prev=True), multiclass_method - # yield 'BayEMQ*2T100000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=100000, - # exact_train_prev=True), multiclass_method - # yield 'BayEMQ-U-Temp1-2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=1, exact_train_prev=True), multiclass_method - yield 'BayEMQ-U-Temp*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=None, exact_train_prev=True), multiclass_method - # yield 'BayEMQ*Temp1', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1, exact_train_prev=True), multiclass_method - # yield 'BayEMQ*Temp10', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=10, exact_train_prev=True), multiclass_method - # yield 'BayEMQ*Temp100', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=100, exact_train_prev=True), multiclass_method - # yield 'BayEMQ*Temp1000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1000, exact_train_prev=True), multiclass_method + #yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(LR(), kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method + # yield f'BaKDE-Gau-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(LR(), kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method + # yield f'BaKDE-Ait-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(LR(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method + # yield f'BaKDE-Gau-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(LR(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method + yield 'BayEMQ-U-Temp1-2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=1, exact_train_prev=True), multiclass_method + yield 'BayEMQ-T*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=None, exact_train_prev=True), multiclass_method -def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict): +def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict): with qp.util.temp_seed(0): print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}') # model selection if len(grid)>0: - train, val = train.split_stratified(train_prop=0.6, random_state=0) + train, val_prot = dataset.get_train_valprot_for_modsel() mod_sel = GridSearchQ( model=point_quantifier, param_grid=grid, - protocol=qp.protocol.UPP(val, repeats=250, random_state=0), + protocol=val_prot, refit=False, n_jobs=-1, verbose=True @@ -103,46 +94,51 @@ def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuan return best_params -def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, withconf_constructor, hyper_choice_path: Path): - with qp.util.temp_seed(0): +def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier): + if hasattr(uncertainty_quantifier, 'temperature') and uncertainty_quantifier.temperature is None: + print('calibrating temperature') + train, val_prot = dataset.get_train_valprot_for_modsel() + temperature = temp_calibration(uncertainty_quantifier, train, val_prot, n_jobs=-1) + uncertainty_quantifier.temperature = temperature - training, test = dataset.train_test + +def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path): + + with qp.util.temp_seed(0): # model selection best_hyperparams = qp.util.pickled_resource( - hyper_choice_path, model_selection, training, cp(point_quantifier), grid + hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid ) t_init = time() - withconf_quantifier = withconf_constructor(best_hyperparams) - if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None: - print('calibrating temperature') - train, val = data.training.split_stratified(train_prop=0.6, random_state=0) - temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1) - withconf_quantifier.temperature = temperature - withconf_quantifier.fit(*training.Xy) + uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams) + temperature_calibration(dataset, uncertainty_quantifier) + training, test_generator = dataset.get_train_testprot_for_eval() + uncertainty_quantifier.fit(*training.Xy) tr_time = time() - t_init # test train_prevalence = training.prevalence() results = defaultdict(list) - test_generator = UPP(test, repeats=100, random_state=0) pbar = tqdm(enumerate(test_generator()), total=test_generator.total()) for i, (sample_X, true_prevalence) in pbar: t_init = time() - point_estimate, region = withconf_quantifier.predict_conf(sample_X) + point_estimate, region = uncertainty_quantifier.predict_conf(sample_X) ttime = time()-t_init + results['true-prevs'].append(true_prevalence) results['point-estim'].append(point_estimate) results['shift'].append(qp.error.ae(true_prevalence, train_prevalence)) results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate)) results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate)) + results['sre'].append(qp.error.sre(prevs_true=true_prevalence, prevs_hat=point_estimate, prevs_train=train_prevalence)) results['coverage'].append(region.coverage(true_prevalence)) results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000)) results['test-time'].append(ttime) results['samples'].append(region.samples) - pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}') + pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}') report = { 'optim_hyper': best_hyperparams, @@ -154,34 +150,40 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method return report +def check_skip_experiment(method_scope, dataset: DatasetHandler): + if method_scope == 'only_binary' and not dataset.is_binary(): + return True + if method_scope == 'only_multiclass' and dataset.is_binary(): + return True + return False + + if __name__ == '__main__': - result_dir = Path('./results') + result_dir = RESULT_DIR + + for data_handler in [LeQuaHandler]:#, UCIMulticlassHandler]: + for dataset in data_handler.iter(): + qp.environ['SAMPLE_SIZE'] = dataset.sample_size() + print(f'dataset={dataset}') + + problem_type = 'binary' if dataset.is_binary() else 'multiclass' - for setup in [multiclass]: # [binary, multiclass]: - qp.environ['SAMPLE_SIZE'] = setup['sample_size'] - for data_name in setup['datasets']: - print(f'dataset={data_name}') - # if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"): - # print(f'skipping dataset: {data_name}') - # continue - data = setup['fetch_fn'](data_name) - is_binary = data.n_classes==2 - result_subdir = result_dir / ('binary' if is_binary else 'multiclass') - hyper_subdir = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass') for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods(): - if method_scope == 'only_binary' and not is_binary: + if check_skip_experiment(method_scope, dataset): continue - if method_scope == 'only_multiclass' and is_binary: - continue - result_path = experiment_path(result_subdir, data_name, method_name) - hyper_path = experiment_path(hyper_subdir, data_name, surrogate_quant.__class__.__name__) + + result_path = experiment_path(result_dir / problem_type, dataset.name(), method_name) + hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name(), surrogate_quant.__class__.__name__) + report = qp.util.pickled_resource( - result_path, experiment, data, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path + result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path ) - print(f'dataset={data_name}, ' + + print(f'dataset={dataset}, ' f'method={method_name}: ' f'mae={report["results"]["ae"].mean():.5f}, ' + f'W={report["results"]["sre"].mean():.5f}, ' f'coverage={report["results"]["coverage"].mean():.5f}, ' f'amplitude={report["results"]["amplitude"].mean():.5f}, ') diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py index 4c1f425..ea5ccd2 100644 --- a/BayesianKDEy/generate_results.py +++ b/BayesianKDEy/generate_results.py @@ -7,10 +7,11 @@ import pandas as pd from glob import glob from pathlib import Path import quapy as qp -from BayesianKDEy.commons import fetch_UCI_binary, fetch_UCI_multiclass +from BayesianKDEy.commons import RESULT_DIR, UCIMulticlassHandler from error import dist_aitchison from quapy.method.confidence import ConfidenceIntervals from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC +import quapy.functional as F pd.set_option('display.max_columns', None) pd.set_option('display.width', 2000) @@ -20,6 +21,31 @@ pd.set_option("display.precision", 4) pd.set_option("display.float_format", "{:.4f}".format) +# methods = None # show all methods +methods = ['BayesianACC', + #'BayesianKDEy', + #'BaKDE-emcee', + # 'BaKDE-numpyro', + # 'BaKDE-numpyro-T2', + # 'BaKDE-numpyro-T10', + # 'BaKDE-numpyro-T*', + 'BaKDE-Ait-numpyro', + 'BaKDE-Ait-T*', + 'BaKDE-Gau-numpyro', + 'BaKDE-Gau-T*', + 'BayEMQ-U-Temp1-2', + 'BayEMQ-T*', + #'BayEMQ-NoInit', + #'BayEMQ-U-Temp*', + # 'BayEMQ*2Temp1', + # 'BayEMQ*2Temp*' + + # 'BootstrapACC', + # 'BootstrapHDy', + # 'BootstrapKDEy', + # 'BootstrapEMQ' + ] + def region_score(true_prev, region: ConfidenceRegionABC): amp = region.montecarlo_proportion(50_000) if true_prev in region: @@ -80,26 +106,6 @@ def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwar update_pickle(report, file, update_fields) -# methods = None # show all methods -methods = ['BayesianACC', #'BayesianKDEy', - #'BaKDE-emcee', - # 'BaKDE-numpyro', - # 'BaKDE-numpyro-T2', - # 'BaKDE-numpyro-T10', - # 'BaKDE-numpyro-T*', - # 'BaKDE-Ait-numpyro', - # 'BaKDE-Ait-numpyro-T*', - 'BaKDE-Ait-numpyro-T*-U', - 'BayEMQ-U-Temp1-2', - 'BayEMQ-U-Temp*', - # 'BayEMQ*2Temp1', - # 'BayEMQ*2Temp*' - - # 'BootstrapACC', - # 'BootstrapHDy', - # 'BootstrapKDEy', - # 'BootstrapEMQ' - ] def nicer(name:str): replacements = { @@ -112,14 +118,19 @@ def nicer(name:str): name = name.replace(k,v) return name -for setup in ['multiclass']: - path = f'./results/{setup}/*.pkl' + +base_dir = RESULT_DIR + +for dataset_handler in [UCIMulticlassHandler]: + problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass' + path = f'./{base_dir}/{problem_type}/*.pkl' table = defaultdict(list) for file in tqdm(glob(path), desc='processing results', total=len(glob(path))): file = Path(file) dataset, method = file.name.replace('.pkl', '').split('__') - if methods is not None and method not in methods: + if method not in methods: continue + report = pickle.load(open(file, 'rb')) results = report['results'] n_samples = len(results['ae']) @@ -166,53 +177,42 @@ for setup in ['multiclass']: n_classes = {} tr_size = {} - for dataset in df['dataset'].unique(): - fetch_fn = { - 'binary': fetch_UCI_binary, - 'multiclass': fetch_UCI_multiclass - }[setup] - data = fetch_fn(dataset) - n_classes[dataset] = data.n_classes - tr_size[dataset] = len(data.training) + tr_prev = {} + for dataset in dataset_handler.iter(): + train = dataset.get_training() + n_classes[dataset] = train.n_classes + tr_size[dataset] = len(train) + tr_prev[dataset] = F.strprev(train.prevalence()) # remove datasets with more than max_classes classes - max_classes = 25 - min_train = 500 - ignore_datasets = ['poker_hand', 'hcv'] - for data_name, n in n_classes.items(): - if n > max_classes: - df = df[df["dataset"] != data_name] - for data_name, n in tr_size.items(): - if n < min_train: - df = df[df["dataset"] != data_name] - for data_name, n in tr_size.items(): - if data_name in ignore_datasets: - df = df[df["dataset"] != data_name] + # max_classes = 25 + # min_train = 500 + # ignore_datasets = ['poker_hand', 'hcv'] + # for data_name, n in n_classes.items(): + # if n > max_classes: + # df = df[df["dataset"] != data_name] + # for data_name, n in tr_size.items(): + # if n < min_train: + # df = df[df["dataset"] != data_name] + # for data_name, n in tr_size.items(): + # if data_name in ignore_datasets: + # df = df[df["dataset"] != data_name] for region in ['CI']: #, 'CLR', 'ILR', 'CI']: - if setup == 'binary' and region=='ILR': + if problem_type == 'binary' and region=='ILR': continue # pv = pd.pivot_table( # df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True # ) - pv = pd.pivot_table( - df, index='dataset', columns='method', values=[ - # f'amperr-{region}', - # f'a-{region}', - f'c-{region}', - # f'w-{region}', - 'ae', - 'SRE', - # 'rae', - # f'aitch', - # f'aitch-well' - # 'reg-score-ILR', - ], margins=True - ) - pv['n_classes'] = pv.index.map(n_classes).astype('Int64') - pv['tr_size'] = pv.index.map(tr_size).astype('Int64') - pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"]) - print(f'{setup=}') - print(pv) - print('-'*80) + for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']: + pv = pd.pivot_table( + df, index='dataset', columns='method', values=column, margins=True + ) + pv['n_classes'] = pv.index.map(n_classes).astype('Int64') + pv['tr_size'] = pv.index.map(tr_size).astype('Int64') + #pv['tr-prev'] = pv.index.map(tr_prev) + pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"]) + print(f'{problem_type=} {column=}') + print(pv) + print('-'*80) diff --git a/BayesianKDEy/temperature_calibration.py b/BayesianKDEy/temperature_calibration.py index 10ca3a1..574bdca 100644 --- a/BayesianKDEy/temperature_calibration.py +++ b/BayesianKDEy/temperature_calibration.py @@ -1,6 +1,6 @@ from build.lib.quapy.data import LabelledCollection from quapy.method.confidence import WithConfidenceABC -from quapy.protocol import UPP +from quapy.protocol import AbstractProtocol import numpy as np from tqdm import tqdm import quapy as qp @@ -8,16 +8,12 @@ from joblib import Parallel, delayed import copy - - def temp_calibration(method:WithConfidenceABC, train:LabelledCollection, - val:LabelledCollection, + val_prot:AbstractProtocol, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], - num_samples=100, nominal_coverage=0.95, amplitude_threshold='auto', - random_state=0, n_jobs=1, verbose=True): @@ -31,41 +27,7 @@ def temp_calibration(method:WithConfidenceABC, if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1: print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions') - - method.fit(*train.Xy) - label_shift_prot = UPP(val, repeats=num_samples, random_state=random_state) - - # results = [] - # temp_grid = sorted(temp_grid) - # for temp in temp_grid: - # method.temperature = temp - # coverage = 0 - # amplitudes = [] - # errs = [] - # pbar = tqdm(enumerate(label_shift_prot()), total=label_shift_prot.total(), disable=not verbose) - # for i, (sample, prev) in pbar: - # point_estim, conf_region = method.predict_conf(sample) - # if prev in conf_region: - # coverage += 1 - # amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000)) - # errs.append(qp.error.mae(prev, point_estim)) - # if verbose: - # pbar.set_description( - # f'temperature={temp:.2f}, ' - # f'coverage={coverage/(i+1):.4f}, ' - # f'amplitude={np.mean(amplitudes):.4f},' - # f'mae={np.mean(errs):.4f}' - # ) - # - # mean_coverage = coverage / label_shift_prot.total() - # mean_amplitude = np.mean(amplitudes) - # - # if mean_amplitude < amplitude_threshold: - # results.append((temp, mean_coverage, mean_amplitude)) - # else: - # break - - def evaluate_temperature(temp): + def evaluate_temperature_job(temp): local_method = copy.deepcopy(method) local_method.temperature = temp @@ -73,7 +35,7 @@ def temp_calibration(method:WithConfidenceABC, amplitudes = [] errs = [] - for i, (sample, prev) in enumerate(label_shift_prot()): + for i, (sample, prev) in enumerate(val_prot()): point_estim, conf_region = local_method.predict_conf(sample) if prev in conf_region: @@ -82,15 +44,19 @@ def temp_calibration(method:WithConfidenceABC, amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000)) errs.append(qp.error.mae(prev, point_estim)) - mean_coverage = coverage / label_shift_prot.total() + mean_coverage = coverage / val_prot.total() mean_amplitude = np.mean(amplitudes) + if verbose: + print(f'Temperature={temp} got coverage={mean_coverage*100:.2f}% amplitude={mean_amplitude*100:.2f}%') + return temp, mean_coverage, mean_amplitude temp_grid = sorted(temp_grid) + method.fit(*train.Xy) raw_results = Parallel(n_jobs=n_jobs, backend="loky")( - delayed(evaluate_temperature)(temp) + delayed(evaluate_temperature_job)(temp) for temp in tqdm(temp_grid, disable=not verbose) ) results = [