adding lequa datasets

This commit is contained in:
Alejandro Moreo Fernandez 2026-01-20 10:53:16 +01:00
parent 9ae65ab09a
commit a6336218e2
7 changed files with 313 additions and 205 deletions

View File

@ -1,4 +1,13 @@
- Add other methods that natively provide uncertainty quantification methods? (e.g., Ratio estimator, Card & Smith)
- Things to try:
- init chain helps? [seems irrelevant in MAPLS...]
- Aitchison kernel is better?
- other classifiers?
- optimize classifier?
- use all datasets?
- improve KDE on wine-quality?
- Add other methods that natively provide uncertainty quantification methods?
Ratio estimator
Card & Smith
- MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
- Implement Interval Score or Winkler Score
- analyze across shift

View File

@ -59,7 +59,8 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
temperature=1.,
engine='numpyro',
prior='uniform',
verbose: bool = False):
verbose: bool = False,
**kwargs):
if num_warmup <= 0:
raise ValueError(f'parameter {num_warmup=} must be a positive integer')
@ -74,6 +75,9 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
assert engine in ['rw-mh', 'emcee', 'numpyro']
super().__init__(classifier, fit_classifier, val_split)
assert all(k.startswith('classifier__') for k in kwargs.keys()), 'unexpected kwargs; must start with "classifier__"'
self.classifier.set_params(**{k.replace('classifier__', ''):v for k,v in kwargs.items()}) # <- improve
self.bandwidth = KDEBase._check_bandwidth(bandwidth, kernel)
self.kernel = self._check_kernel(kernel)
self.num_warmup = num_warmup

View File

@ -39,6 +39,7 @@ class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC):
region: str = 'intervals',
temperature=1.,
prior='uniform',
mapls_chain_init=True,
verbose=False
):
@ -53,6 +54,7 @@ class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC):
self.region = region
self.temperature = temperature
self.prior = prior
self.mapls_chain_init = mapls_chain_init
self.verbose = verbose
def aggregation_fit(self, classif_predictions, labels):
@ -74,7 +76,7 @@ class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC):
return_lambda=True
)
# pi_star: MAP in simplex (shape: [K]), convert to ILR space
# pi_star: MAP in simplex shape (n_classes,) and convert to ILR space
z0 = self.ilr(pi_star)
if self.prior == 'uniform':
@ -107,7 +109,7 @@ class BayesianMAPLS(AggregativeSoftQuantifier, WithConfidenceABC):
random.PRNGKey(self.mcmc_seed),
test_posteriors=classif_predictions,
alpha=alpha,
init_params={"z": z0}
init_params={"z": z0} if self.mapls_chain_init else None
)
samples = mcmc.get_samples()["z"]

View File

@ -14,11 +14,134 @@ import numpy as np
from method.aggregative import KDEyML
from quapy.functional import l1_norm, ILRtransformation
from scipy.stats import entropy
from abc import ABC, abstractmethod
FINEGRAINED = True
RESULT_DIR = Path('results_finegrained') if FINEGRAINED else Path('results')
def fetch_UCI_multiclass(data_name):
return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01)
class DatasetHandler(ABC):
def __init__(self, name:str, sample_size:int):
self._name = name
self._sample_size = sample_size
@abstractmethod
def get_training(self): ...
@abstractmethod
def get_train_testprot_for_eval(self): ...
@abstractmethod
def get_train_valprot_for_modsel(self): ...
def sample_size(self):
return self._sample_size
def name(self):
return self._name
@classmethod
@abstractmethod
def iter(cls): ...
def __repr__(self):
return self.__class__.__name__
@classmethod
@abstractmethod
def is_binary(self):
...
class UCIMulticlassHandler(DatasetHandler):
DATASETS = qp.datasets.UCI_MULTICLASS_DATASETS.copy()
def __init__(self, name, n_val_samples=100, n_test_samples=100):
super().__init__(name, sample_size=1000)
self._dataset = None # lazy fetch
self.n_val_samples = n_val_samples
self.n_test_samples = n_test_samples
def get_training(self):
return self.dataset().training
def get_train_testprot_for_eval(self):
training, test = self.dataset().train_test
test_generator = qp.protocol.UPP(test, repeats=self.n_test_samples, random_state=0)
return training, test_generator
def get_train_valprot_for_modsel(self):
training = self.dataset().training
training, val = training.split_stratified(train_prop=0.6, random_state=0)
val_generator = qp.protocol.UPP(val, repeats=self.n_val_samples, random_state=0)
return training, val_generator
@lru_cache(maxsize=None)
def dataset(self):
if self._dataset is None:
self._dataset = qp.datasets.fetch_UCIMulticlassDataset(self.name(), min_class_support=0.01)
return self._dataset
def __repr__(self):
return "" # self.dataset().__repr__()
@classmethod
def iter(cls):
for name in cls.DATASETS:
yield cls(name)
@classmethod
def is_binary(self):
return False
class LeQuaHandler(DatasetHandler):
DATASETS = ['LeQua2022', 'LeQua2024']
def __init__(self, name):
super().__init__(name, sample_size=1000)
self._dataset = None # lazy fetch
def get_training(self):
return self.dataset()[0]
def get_train_testprot_for_eval(self):
training, _, test_generator = self.dataset()
return training, test_generator
def get_train_valprot_for_modsel(self):
training, val_generator, _ = self.dataset()
return training, val_generator
@lru_cache(maxsize=None)
def dataset(self):
if self._dataset is None:
if self.name()=='LeQua2022':
self._dataset = qp.datasets.fetch_lequa2022(task='T1B')
elif self.name()=='LeQua2024':
self._dataset = qp.datasets.fetch_lequa2024(task='T2')
else:
raise ValueError(f'unexpected dataset name {self.name()}; valid ones are {self.DATASETS}')
return self._dataset
def __repr__(self):
return self.dataset().__repr__()
@classmethod
def iter(cls):
for name in cls.DATASETS:
yield cls(name)
@classmethod
def is_binary(self):
return False
# def fetch_UCI_multiclass(data_name):
# return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01)
def fetch_UCI_binary(data_name):
@ -32,18 +155,20 @@ binary = {
'sample_size': 500
}
multiclass = {
'datasets': qp.datasets.UCI_MULTICLASS_DATASETS.copy(),
'fetch_fn': fetch_UCI_multiclass,
'sample_size': 1000
}
try:
multiclass['datasets'].remove('poker_hand') # random performance
multiclass['datasets'].remove('hcv') # random performance
multiclass['datasets'].remove('letter') # many classes
multiclass['datasets'].remove('isolet') # many classes
except ValueError:
pass
# multiclass = {
# 'datasets': qp.datasets.UCI_MULTICLASS_DATASETS.copy(),
# 'fetch_fn': fetch_UCI_multiclass,
# 'sample_size': 1000
# }
# try:
# multiclass['datasets'].remove('poker_hand') # random performance
# multiclass['datasets'].remove('hcv') # random performance
# multiclass['datasets'].remove('letter') # many classes
# multiclass['datasets'].remove('isolet') # many classes
# except ValueError:
# pass
# utils

View File

@ -5,13 +5,13 @@ from copy import deepcopy as cp
import quapy as qp
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
from BayesianKDEy._bayesian_mapls import BayesianMAPLS
from BayesianKDEy.commons import multiclass, experiment_path, KDEyCLR
from BayesianKDEy.commons import experiment_path, KDEyCLR, FINEGRAINED, RESULT_DIR, DatasetHandler, \
UCIMulticlassHandler, LeQuaHandler
from BayesianKDEy.temperature_calibration import temp_calibration
from build.lib.quapy.data import LabelledCollection
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
from quapy.model_selection import GridSearchQ
from quapy.data import Dataset
# from BayesianKDEy.plot_simplex import plot_prev_points, plot_prev_points_matplot
from quapy.method.confidence import BayesianCC, AggregativeBootstrap
from quapy.method.aggregative import KDEyML, ACC
from quapy.protocol import UPP
@ -21,6 +21,7 @@ from collections import defaultdict
from time import time
def methods():
"""
Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
@ -30,68 +31,58 @@ def methods():
- bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
quantifier with optimized hyperparameters
"""
acc_hyper = {}
emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs']}
hdy_hyper = {'nbins': [3,4,5,8,16,32]}
kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
if FINEGRAINED:
lr_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
acc_hyper = lr_hyper
emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **lr_hyper}
hdy_hyper = {'nbins': [3,4,5,8,16,32], **lr_hyper}
kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **lr_hyper}
kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **lr_hyper}
else:
acc_hyper = {}
emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs']}
hdy_hyper = {'nbins': [3,4,5,8,16,32]}
kdey_hyper = {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]}
kdey_hyper_clr = {'bandwidth': [0.05, 0.1, 0.5, 1., 2., 5.]}
multiclass_method = 'multiclass'
only_binary = 'only_binary'
only_multiclass = 'only_multiclass'
yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
# Bootstrap approaches:
# --------------------------------------------------------------------------------------------------------
#yield 'BootstrapACC', ACC(LR()), acc_hyper, lambda hyper: AggregativeBootstrap(ACC(LR()), n_test_samples=1000, random_state=0), multiclass_method
#yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
#yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
#yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
yield 'BootstrapEMQ', EMQ(LR(), on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: AggregativeBootstrap(EMQ(LR(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
yield 'BootstrapHDy', DMy(LR()), hdy_hyper, lambda hyper: AggregativeBootstrap(DMy(LR(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
# Bayesian approaches:
# --------------------------------------------------------------------------------------------------------
# yield 'BayesianACC', ACC(LR()), acc_hyper, lambda hyper: BayesianCC(LR(), mcmc_seed=0), multiclass_method
# yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
#
yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
# yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
# yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
# yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
# yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
# yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
# yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
# yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
# yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
#yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
# yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
# yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
yield f'BaKDE-Ait-numpyro-T*-U', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, prior='uniform', **hyper), multiclass_method
# yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
# yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
# yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield 'BayEMQ', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', exact_train_prev=True), multiclass_method
# yield 'BayEMQ*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2T*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=None, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2T01', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=0.1, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2T10000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=10000, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*2T100000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map2', temperature=100000,
# exact_train_prev=True), multiclass_method
# yield 'BayEMQ-U-Temp1-2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=1, exact_train_prev=True), multiclass_method
yield 'BayEMQ-U-Temp*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=None, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*Temp1', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*Temp10', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=10, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*Temp100', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=100, exact_train_prev=True), multiclass_method
# yield 'BayEMQ*Temp1000', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='map', temperature=1000, exact_train_prev=True), multiclass_method
#yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(LR(), kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield f'BaKDE-Gau-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(LR(), kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
# yield f'BaKDE-Ait-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(LR(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
# yield f'BaKDE-Gau-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(LR(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
yield 'BayEMQ-U-Temp1-2', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=1, exact_train_prev=True), multiclass_method
yield 'BayEMQ-T*', CC(LR()), acc_hyper, lambda hyper: BayesianMAPLS(LR(), prior='uniform', temperature=None, exact_train_prev=True), multiclass_method
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
with qp.util.temp_seed(0):
print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
# model selection
if len(grid)>0:
train, val = train.split_stratified(train_prop=0.6, random_state=0)
train, val_prot = dataset.get_train_valprot_for_modsel()
mod_sel = GridSearchQ(
model=point_quantifier,
param_grid=grid,
protocol=qp.protocol.UPP(val, repeats=250, random_state=0),
protocol=val_prot,
refit=False,
n_jobs=-1,
verbose=True
@ -103,46 +94,51 @@ def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuan
return best_params
def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, withconf_constructor, hyper_choice_path: Path):
with qp.util.temp_seed(0):
def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
if hasattr(uncertainty_quantifier, 'temperature') and uncertainty_quantifier.temperature is None:
print('calibrating temperature')
train, val_prot = dataset.get_train_valprot_for_modsel()
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, n_jobs=-1)
uncertainty_quantifier.temperature = temperature
training, test = dataset.train_test
def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path):
with qp.util.temp_seed(0):
# model selection
best_hyperparams = qp.util.pickled_resource(
hyper_choice_path, model_selection, training, cp(point_quantifier), grid
hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid
)
t_init = time()
withconf_quantifier = withconf_constructor(best_hyperparams)
if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None:
print('calibrating temperature')
train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1)
withconf_quantifier.temperature = temperature
withconf_quantifier.fit(*training.Xy)
uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams)
temperature_calibration(dataset, uncertainty_quantifier)
training, test_generator = dataset.get_train_testprot_for_eval()
uncertainty_quantifier.fit(*training.Xy)
tr_time = time() - t_init
# test
train_prevalence = training.prevalence()
results = defaultdict(list)
test_generator = UPP(test, repeats=100, random_state=0)
pbar = tqdm(enumerate(test_generator()), total=test_generator.total())
for i, (sample_X, true_prevalence) in pbar:
t_init = time()
point_estimate, region = withconf_quantifier.predict_conf(sample_X)
point_estimate, region = uncertainty_quantifier.predict_conf(sample_X)
ttime = time()-t_init
results['true-prevs'].append(true_prevalence)
results['point-estim'].append(point_estimate)
results['shift'].append(qp.error.ae(true_prevalence, train_prevalence))
results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate))
results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate))
results['sre'].append(qp.error.sre(prevs_true=true_prevalence, prevs_hat=point_estimate, prevs_train=train_prevalence))
results['coverage'].append(region.coverage(true_prevalence))
results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
results['test-time'].append(ttime)
results['samples'].append(region.samples)
pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')
pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')
report = {
'optim_hyper': best_hyperparams,
@ -154,34 +150,40 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
return report
def check_skip_experiment(method_scope, dataset: DatasetHandler):
if method_scope == 'only_binary' and not dataset.is_binary():
return True
if method_scope == 'only_multiclass' and dataset.is_binary():
return True
return False
if __name__ == '__main__':
result_dir = Path('./results')
result_dir = RESULT_DIR
for data_handler in [LeQuaHandler]:#, UCIMulticlassHandler]:
for dataset in data_handler.iter():
qp.environ['SAMPLE_SIZE'] = dataset.sample_size()
print(f'dataset={dataset}')
problem_type = 'binary' if dataset.is_binary() else 'multiclass'
for setup in [multiclass]: # [binary, multiclass]:
qp.environ['SAMPLE_SIZE'] = setup['sample_size']
for data_name in setup['datasets']:
print(f'dataset={data_name}')
# if data_name=='breast-cancer' or data_name.startswith("cmc") or data_name.startswith("ctg"):
# print(f'skipping dataset: {data_name}')
# continue
data = setup['fetch_fn'](data_name)
is_binary = data.n_classes==2
result_subdir = result_dir / ('binary' if is_binary else 'multiclass')
hyper_subdir = result_dir / 'hyperparams' / ('binary' if is_binary else 'multiclass')
for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods():
if method_scope == 'only_binary' and not is_binary:
if check_skip_experiment(method_scope, dataset):
continue
if method_scope == 'only_multiclass' and is_binary:
continue
result_path = experiment_path(result_subdir, data_name, method_name)
hyper_path = experiment_path(hyper_subdir, data_name, surrogate_quant.__class__.__name__)
result_path = experiment_path(result_dir / problem_type, dataset.name(), method_name)
hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name(), surrogate_quant.__class__.__name__)
report = qp.util.pickled_resource(
result_path, experiment, data, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path
result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path
)
print(f'dataset={data_name}, '
print(f'dataset={dataset}, '
f'method={method_name}: '
f'mae={report["results"]["ae"].mean():.5f}, '
f'W={report["results"]["sre"].mean():.5f}, '
f'coverage={report["results"]["coverage"].mean():.5f}, '
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')

View File

@ -7,10 +7,11 @@ import pandas as pd
from glob import glob
from pathlib import Path
import quapy as qp
from BayesianKDEy.commons import fetch_UCI_binary, fetch_UCI_multiclass
from BayesianKDEy.commons import RESULT_DIR, UCIMulticlassHandler
from error import dist_aitchison
from quapy.method.confidence import ConfidenceIntervals
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
import quapy.functional as F
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)
@ -20,6 +21,31 @@ pd.set_option("display.precision", 4)
pd.set_option("display.float_format", "{:.4f}".format)
# methods = None # show all methods
methods = ['BayesianACC',
#'BayesianKDEy',
#'BaKDE-emcee',
# 'BaKDE-numpyro',
# 'BaKDE-numpyro-T2',
# 'BaKDE-numpyro-T10',
# 'BaKDE-numpyro-T*',
'BaKDE-Ait-numpyro',
'BaKDE-Ait-T*',
'BaKDE-Gau-numpyro',
'BaKDE-Gau-T*',
'BayEMQ-U-Temp1-2',
'BayEMQ-T*',
#'BayEMQ-NoInit',
#'BayEMQ-U-Temp*',
# 'BayEMQ*2Temp1',
# 'BayEMQ*2Temp*'
# 'BootstrapACC',
# 'BootstrapHDy',
# 'BootstrapKDEy',
# 'BootstrapEMQ'
]
def region_score(true_prev, region: ConfidenceRegionABC):
amp = region.montecarlo_proportion(50_000)
if true_prev in region:
@ -80,26 +106,6 @@ def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwar
update_pickle(report, file, update_fields)
# methods = None # show all methods
methods = ['BayesianACC', #'BayesianKDEy',
#'BaKDE-emcee',
# 'BaKDE-numpyro',
# 'BaKDE-numpyro-T2',
# 'BaKDE-numpyro-T10',
# 'BaKDE-numpyro-T*',
# 'BaKDE-Ait-numpyro',
# 'BaKDE-Ait-numpyro-T*',
'BaKDE-Ait-numpyro-T*-U',
'BayEMQ-U-Temp1-2',
'BayEMQ-U-Temp*',
# 'BayEMQ*2Temp1',
# 'BayEMQ*2Temp*'
# 'BootstrapACC',
# 'BootstrapHDy',
# 'BootstrapKDEy',
# 'BootstrapEMQ'
]
def nicer(name:str):
replacements = {
@ -112,14 +118,19 @@ def nicer(name:str):
name = name.replace(k,v)
return name
for setup in ['multiclass']:
path = f'./results/{setup}/*.pkl'
base_dir = RESULT_DIR
for dataset_handler in [UCIMulticlassHandler]:
problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass'
path = f'./{base_dir}/{problem_type}/*.pkl'
table = defaultdict(list)
for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
file = Path(file)
dataset, method = file.name.replace('.pkl', '').split('__')
if methods is not None and method not in methods:
if method not in methods:
continue
report = pickle.load(open(file, 'rb'))
results = report['results']
n_samples = len(results['ae'])
@ -166,53 +177,42 @@ for setup in ['multiclass']:
n_classes = {}
tr_size = {}
for dataset in df['dataset'].unique():
fetch_fn = {
'binary': fetch_UCI_binary,
'multiclass': fetch_UCI_multiclass
}[setup]
data = fetch_fn(dataset)
n_classes[dataset] = data.n_classes
tr_size[dataset] = len(data.training)
tr_prev = {}
for dataset in dataset_handler.iter():
train = dataset.get_training()
n_classes[dataset] = train.n_classes
tr_size[dataset] = len(train)
tr_prev[dataset] = F.strprev(train.prevalence())
# remove datasets with more than max_classes classes
max_classes = 25
min_train = 500
ignore_datasets = ['poker_hand', 'hcv']
for data_name, n in n_classes.items():
if n > max_classes:
df = df[df["dataset"] != data_name]
for data_name, n in tr_size.items():
if n < min_train:
df = df[df["dataset"] != data_name]
for data_name, n in tr_size.items():
if data_name in ignore_datasets:
df = df[df["dataset"] != data_name]
# max_classes = 25
# min_train = 500
# ignore_datasets = ['poker_hand', 'hcv']
# for data_name, n in n_classes.items():
# if n > max_classes:
# df = df[df["dataset"] != data_name]
# for data_name, n in tr_size.items():
# if n < min_train:
# df = df[df["dataset"] != data_name]
# for data_name, n in tr_size.items():
# if data_name in ignore_datasets:
# df = df[df["dataset"] != data_name]
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
if setup == 'binary' and region=='ILR':
if problem_type == 'binary' and region=='ILR':
continue
# pv = pd.pivot_table(
# df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
# )
pv = pd.pivot_table(
df, index='dataset', columns='method', values=[
# f'amperr-{region}',
# f'a-{region}',
f'c-{region}',
# f'w-{region}',
'ae',
'SRE',
# 'rae',
# f'aitch',
# f'aitch-well'
# 'reg-score-ILR',
], margins=True
)
pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
print(f'{setup=}')
print(pv)
print('-'*80)
for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']:
pv = pd.pivot_table(
df, index='dataset', columns='method', values=column, margins=True
)
pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
#pv['tr-prev'] = pv.index.map(tr_prev)
pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
print(f'{problem_type=} {column=}')
print(pv)
print('-'*80)

View File

@ -1,6 +1,6 @@
from build.lib.quapy.data import LabelledCollection
from quapy.method.confidence import WithConfidenceABC
from quapy.protocol import UPP
from quapy.protocol import AbstractProtocol
import numpy as np
from tqdm import tqdm
import quapy as qp
@ -8,16 +8,12 @@ from joblib import Parallel, delayed
import copy
def temp_calibration(method:WithConfidenceABC,
train:LabelledCollection,
val:LabelledCollection,
val_prot:AbstractProtocol,
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.],
num_samples=100,
nominal_coverage=0.95,
amplitude_threshold='auto',
random_state=0,
n_jobs=1,
verbose=True):
@ -31,41 +27,7 @@ def temp_calibration(method:WithConfidenceABC,
if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
method.fit(*train.Xy)
label_shift_prot = UPP(val, repeats=num_samples, random_state=random_state)
# results = []
# temp_grid = sorted(temp_grid)
# for temp in temp_grid:
# method.temperature = temp
# coverage = 0
# amplitudes = []
# errs = []
# pbar = tqdm(enumerate(label_shift_prot()), total=label_shift_prot.total(), disable=not verbose)
# for i, (sample, prev) in pbar:
# point_estim, conf_region = method.predict_conf(sample)
# if prev in conf_region:
# coverage += 1
# amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
# errs.append(qp.error.mae(prev, point_estim))
# if verbose:
# pbar.set_description(
# f'temperature={temp:.2f}, '
# f'coverage={coverage/(i+1):.4f}, '
# f'amplitude={np.mean(amplitudes):.4f},'
# f'mae={np.mean(errs):.4f}'
# )
#
# mean_coverage = coverage / label_shift_prot.total()
# mean_amplitude = np.mean(amplitudes)
#
# if mean_amplitude < amplitude_threshold:
# results.append((temp, mean_coverage, mean_amplitude))
# else:
# break
def evaluate_temperature(temp):
def evaluate_temperature_job(temp):
local_method = copy.deepcopy(method)
local_method.temperature = temp
@ -73,7 +35,7 @@ def temp_calibration(method:WithConfidenceABC,
amplitudes = []
errs = []
for i, (sample, prev) in enumerate(label_shift_prot()):
for i, (sample, prev) in enumerate(val_prot()):
point_estim, conf_region = local_method.predict_conf(sample)
if prev in conf_region:
@ -82,15 +44,19 @@ def temp_calibration(method:WithConfidenceABC,
amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
errs.append(qp.error.mae(prev, point_estim))
mean_coverage = coverage / label_shift_prot.total()
mean_coverage = coverage / val_prot.total()
mean_amplitude = np.mean(amplitudes)
if verbose:
print(f'Temperature={temp} got coverage={mean_coverage*100:.2f}% amplitude={mean_amplitude*100:.2f}%')
return temp, mean_coverage, mean_amplitude
temp_grid = sorted(temp_grid)
method.fit(*train.Xy)
raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
delayed(evaluate_temperature)(temp)
delayed(evaluate_temperature_job)(temp)
for temp in tqdm(temp_grid, disable=not verbose)
)
results = [