broken submodule fix
This commit is contained in:
parent
81472b9d25
commit
bb5763da76
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "result_table"]
|
||||
path = result_table
|
||||
url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
- Things to try:
|
||||
- Why not optmize the calibration of the classifier, instead of the classifier as a component of the quantifier?
|
||||
- init chain helps? [seems irrelevant in MAPLS...]
|
||||
- Aitchison kernel is better?
|
||||
- other classifiers?
|
||||
|
|
|
|||
|
|
@ -101,6 +101,13 @@ class KDEyCLR(KDEyML):
|
|||
random_state=random_state, kernel='aitchison'
|
||||
)
|
||||
|
||||
class KDEyCLR2(KDEyML):
|
||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||
super().__init__(
|
||||
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
||||
random_state=random_state, kernel='aitchison'
|
||||
)
|
||||
|
||||
|
||||
class KDEyILR(KDEyML):
|
||||
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
||||
|
|
|
|||
|
|
@ -180,7 +180,7 @@ class VisualDataHandler(DatasetHandler):
|
|||
|
||||
@classmethod
|
||||
def get_datasets(cls):
|
||||
datasets = ['cifar10', 'mnist', 'cifar100coarse', 'fashionmnist', 'svhn'] #+ ['cifar100']
|
||||
datasets = ['cifar100coarse', 'cifar10', 'mnist', 'fashionmnist', 'svhn'] #+ ['cifar100']
|
||||
# datasets_feat = [f'{d}-f' for d in datasets]
|
||||
datasets_feat = [f'{d}-l' for d in datasets]
|
||||
return datasets_feat # + datasets
|
||||
|
|
@ -295,7 +295,7 @@ class UCIDatasetHandler(DatasetHandler, ABC):
|
|||
|
||||
class UCIMulticlassHandler(UCIDatasetHandler):
|
||||
|
||||
DATASETS = [d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])]
|
||||
DATASETS = sorted([d for d in qp.datasets.UCI_MULTICLASS_DATASETS if d not in frozenset(['hcv', 'poker_hand'])])
|
||||
|
||||
def __init__(self, name, n_val_samples=100, n_test_samples=100, sample_size=1000, random_state=0):
|
||||
super().__init__(name, n_val_samples, n_test_samples, sample_size, random_state)
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from sklearn.linear_model import LogisticRegression
|
|||
from copy import deepcopy as cp
|
||||
import quapy as qp
|
||||
from BayesianKDEy.commons import KDEyReduce
|
||||
from BayesianKDEy.methods import get_experimental_methods, MethodDescriptor
|
||||
from _bayeisan_kdey import BayesianKDEy
|
||||
from _bayesian_mapls import BayesianMAPLS
|
||||
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
|
||||
|
|
@ -24,7 +25,7 @@ from time import time
|
|||
|
||||
|
||||
|
||||
def methods(data_handler: DatasetHandler):
|
||||
def methods___depr():
|
||||
"""
|
||||
Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
|
||||
- name: is a str representing the name of the method (e.g., 'BayesianKDEy')
|
||||
|
|
@ -33,26 +34,23 @@ def methods(data_handler: DatasetHandler):
|
|||
- bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
|
||||
quantifier with optimized hyperparameters
|
||||
"""
|
||||
if False: # isinstance(data_handler, VisualDataHandler):
|
||||
Cls = MockClassifierFromPosteriors
|
||||
cls_hyper = {}
|
||||
val_split = data_handler.get_validation().Xy # use this specific collection
|
||||
pass
|
||||
else:
|
||||
Cls = LogisticRegression
|
||||
cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
|
||||
val_split = 5 # k-fold cross-validation
|
||||
|
||||
Cls = LogisticRegression
|
||||
cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
|
||||
val_split = 5 # k-fold cross-validation
|
||||
cc_hyper = cls_hyper
|
||||
acc_hyper = cls_hyper
|
||||
# emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **cls_hyper}
|
||||
hdy_hyper = {'nbins': [3,4,5,8,16,32], **cls_hyper}
|
||||
kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
|
||||
kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
|
||||
|
||||
band ={'bandwidth':np.logspace(-3,-1,10)}
|
||||
multiclass_method = 'multiclass'
|
||||
only_binary = 'only_binary'
|
||||
only_multiclass = 'only_multiclass'
|
||||
|
||||
# surrogate quantifiers
|
||||
cc = CC(Cls())
|
||||
acc = ACC(Cls(), val_split=val_split)
|
||||
hdy = DMy(Cls(), val_split=val_split)
|
||||
kde_gau = KDEyML(Cls(), val_split=val_split)
|
||||
|
|
@ -65,22 +63,26 @@ def methods(data_handler: DatasetHandler):
|
|||
|
||||
# Bootstrap approaches:
|
||||
# --------------------------------------------------------------------------------------------------------
|
||||
# yield 'BootstrapCC', cc, cc_hyper, lambda hyper: AggregativeBootstrap(CC(Cls()), n_test_samples=1000, random_state=0), multiclass_method
|
||||
#yield 'BootstrapACC', acc, acc_hyper, lambda hyper: _AggregativeBootstrap(ACC(Cls()), n_test_samples=1000, random_state=0), multiclass_method
|
||||
#yield 'BootstrapEMQ', emq, on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: _AggregativeBootstrap(EMQ(Cls(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
|
||||
#yield 'BootstrapHDy', hdy, hdy_hyper, lambda hyper: _AggregativeBootstrap(DMy(Cls(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
|
||||
#yield 'BootstrapKDEy', kde_gau, kdey_hyper, lambda hyper: _AggregativeBootstrap(KDEyML(Cls(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
|
||||
|
||||
# Bayesian approaches:
|
||||
# Bayesian approaches: (*=temp calibration auto threshold and coverage sim to nominal; +=temp calibration w/o amplitude coverage, for winkler criterion, !=same but alpha=0.005 for winkler)
|
||||
# --------------------------------------------------------------------------------------------------------
|
||||
# yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
|
||||
# yield 'BayesianACC*', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
|
||||
# yield 'BayesianACC+', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
|
||||
# yield 'BayesianACC!', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, temperature=None, mcmc_seed=0), multiclass_method
|
||||
#yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
|
||||
# yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-pca5', kde_gau_pca, band, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-pca5*', kde_gau_pca, band, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-pca10', kde_gau_pca10, band, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-pca10*', kde_gau_pca10, band, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split, **hyper), multiclass_method
|
||||
# yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
# yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
# yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5,
|
||||
|
|
@ -89,19 +91,21 @@ def methods(data_handler: DatasetHandler):
|
|||
# engine='numpyro',
|
||||
# **hyper), multiclass_method
|
||||
# yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||
# yield f'BaKDE-Ait-T!', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||
#yield f'BaKDE-Gau-T!', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
|
||||
# yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||
# yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||
# yield 'BayEMQ!', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||
# yield 'BaEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
|
||||
|
||||
# yield 'BaACC!', acc, acc_hyper, lambda hyper: BayesianCC(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), temperature=None, mcmc_seed=0), multiclass_method
|
||||
# yield 'BaEMQ!', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(**{k.replace('classifier__', ''): v for k, v in hyper.items()}), prior='uniform', temperature=None, exact_train_prev=False), multiclass_method
|
||||
|
||||
|
||||
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||
with qp.util.temp_seed(0):
|
||||
if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
|
||||
def scale_bandwidth(bandwidth, n_classes, beta=0.5):
|
||||
return bandwidth * np.power(n_classes, beta)
|
||||
n = dataset.get_training().n_classes
|
||||
grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
|
||||
print('bandwidth scaled')
|
||||
point_quantifier = cp(point_quantifier)
|
||||
print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
|
||||
# model selection
|
||||
if len(grid)>0:
|
||||
|
|
@ -127,30 +131,26 @@ def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
|
|||
if uncertainty_quantifier.temperature is None:
|
||||
print('calibrating temperature')
|
||||
train, val_prot = dataset.get_train_valprot_for_modsel()
|
||||
if dataset.name.startswith('LeQua'):
|
||||
temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
|
||||
else:
|
||||
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.]
|
||||
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999)
|
||||
temp_grid=[1., .5, 1.5, 2., 5., 10., 100., 1000.]
|
||||
temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=1., criterion='winkler')
|
||||
uncertainty_quantifier.temperature = temperature
|
||||
else:
|
||||
temperature = uncertainty_quantifier.temperature
|
||||
return temperature
|
||||
|
||||
|
||||
|
||||
def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path):
|
||||
def experiment(dataset: DatasetHandler, method: MethodDescriptor, hyper_choice_path: Path):
|
||||
|
||||
with qp.util.temp_seed(0):
|
||||
|
||||
# model selection
|
||||
best_hyperparams = qp.util.pickled_resource(
|
||||
hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid
|
||||
hyper_choice_path, model_selection, dataset, method.surrogate_quantifier(), method.hyper_parameters
|
||||
)
|
||||
print(f'{best_hyperparams=}')
|
||||
|
||||
t_init = time()
|
||||
uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams)
|
||||
uncertainty_quantifier = method.uncertainty_aware_quantifier(best_hyperparams)
|
||||
temperature = temperature_calibration(dataset, uncertainty_quantifier)
|
||||
training, test_generator = dataset.get_train_testprot_for_eval()
|
||||
uncertainty_quantifier.fit(*training.Xy)
|
||||
|
|
@ -176,7 +176,13 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier,
|
|||
results['test-time'].append(ttime)
|
||||
results['samples'].append(region.samples)
|
||||
|
||||
pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')
|
||||
pbar.set_description(
|
||||
f'{method.name} '
|
||||
f'MAE={np.mean(results["ae"]):.5f} '
|
||||
f'W={np.mean(results["sre"]):.5f} '
|
||||
f'Cov={np.mean(results["coverage"]):.5f} '
|
||||
f'AMP={np.mean(results["amplitude"]):.5f}'
|
||||
)
|
||||
|
||||
report = {
|
||||
'optim_hyper': best_hyperparams,
|
||||
|
|
@ -189,40 +195,31 @@ def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier,
|
|||
return report
|
||||
|
||||
|
||||
def check_skip_experiment(method_scope, dataset: DatasetHandler):
|
||||
if method_scope == 'only_binary' and not dataset.is_binary():
|
||||
return True
|
||||
if method_scope == 'only_multiclass' and dataset.is_binary():
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
result_dir = RESULT_DIR
|
||||
|
||||
for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
|
||||
for data_handler in [LeQuaHandler]:#, UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
|
||||
for dataset in data_handler.iter():
|
||||
qp.environ['SAMPLE_SIZE'] = dataset.sample_size
|
||||
print(f'dataset={dataset.name}')
|
||||
#if dataset.name != 'abalone':
|
||||
# continue
|
||||
|
||||
problem_type = 'binary' if dataset.is_binary() else 'multiclass'
|
||||
|
||||
for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods(dataset):
|
||||
if check_skip_experiment(method_scope, dataset):
|
||||
for method in get_experimental_methods():
|
||||
# skip combination?
|
||||
if method.binary_only() and not dataset.is_binary():
|
||||
continue
|
||||
|
||||
result_path = experiment_path(result_dir / problem_type, dataset.name, method_name)
|
||||
hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, surrogate_quant.__class__.__name__)
|
||||
result_path = experiment_path(result_dir / problem_type, dataset.name, method.name)
|
||||
hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, method.surrogate_quantifier_name())
|
||||
|
||||
report = qp.util.pickled_resource(
|
||||
result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path
|
||||
result_path, experiment, dataset, method, hyper_path
|
||||
)
|
||||
|
||||
print(f'dataset={dataset.name}, '
|
||||
f'method={method_name}: '
|
||||
f'method={method.name}: '
|
||||
f'mae={report["results"]["ae"].mean():.5f}, '
|
||||
f'W={report["results"]["sre"].mean():.5f}, '
|
||||
f'coverage={report["results"]["coverage"].mean():.5f}, '
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import pickle
|
||||
from collections import defaultdict
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
from joblib import Parallel, delayed
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
|
|
@ -9,10 +10,16 @@ from pathlib import Path
|
|||
import quapy as qp
|
||||
from BayesianKDEy.commons import RESULT_DIR
|
||||
from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
|
||||
from comparison_group import SelectGreaterThan, SelectByName, SelectSmallerThan
|
||||
from format import FormatModifierSelectColor
|
||||
from quapy.error import dist_aitchison
|
||||
from quapy.method.confidence import ConfidenceIntervals
|
||||
from quapy.method.confidence import ConfidenceIntervals, ConfidenceIntervalsILR, ConfidenceIntervalsCLR
|
||||
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
||||
import quapy.functional as F
|
||||
from result_path.src.table import LatexTable
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from itertools import chain
|
||||
|
||||
pd.set_option('display.max_columns', None)
|
||||
pd.set_option('display.width', 2000)
|
||||
|
|
@ -23,19 +30,16 @@ pd.set_option("display.float_format", "{:.4f}".format)
|
|||
|
||||
|
||||
# methods = None # show all methods
|
||||
methods = ['BayesianACC',
|
||||
'BaKDE-Ait-numpyro',
|
||||
'BaKDE-Ait-T*',
|
||||
'BaKDE-Gau-numpyro',
|
||||
'BaKDE-Gau-T*', 'BaKDE-Gau-pca5', 'BaKDE-Gau-pca5*', 'BaKDE-Gau-pca10', 'BaKDE-Gau-pca10*',
|
||||
# 'BayEMQ-U-Temp1-2',
|
||||
# 'BayEMQ-T*',
|
||||
'BayEMQ',
|
||||
'BayEMQ*',
|
||||
# 'BootstrapACC',
|
||||
# 'BootstrapHDy',
|
||||
# 'BootstrapKDEy',
|
||||
# 'BootstrapEMQ'
|
||||
methods = ['BoCC',
|
||||
'BaACC!',
|
||||
'BaEMQ!',
|
||||
'BaKDE-Gau-T!',
|
||||
'BaKDE-Ait-T!',
|
||||
'BaKDE-Ait-T!2'
|
||||
#'BootstrapACC',
|
||||
#'BootstrapHDy',
|
||||
#'BootstrapKDEy',
|
||||
#'BootstrapEMQ'
|
||||
]
|
||||
|
||||
def region_score(true_prev, region: ConfidenceRegionABC):
|
||||
|
|
@ -55,11 +59,15 @@ def compute_coverage_amplitude(region_constructor, **kwargs):
|
|||
|
||||
def process_one(samples, true_prevs):
|
||||
region = region_constructor(samples, **kwargs)
|
||||
if isinstance(region, ConfidenceIntervals):
|
||||
if isinstance(region, ConfidenceIntervals) or isinstance(region, ConfidenceIntervalsCLR) or isinstance(region, ConfidenceIntervalsILR):
|
||||
winkler = region.mean_winkler_score(true_prevs)
|
||||
# winkler_e = region.mean_winkler_score(true_prevs, add_ae=True)
|
||||
cov_soft = region.coverage_soft(true_prevs)
|
||||
else:
|
||||
winkler = None
|
||||
return region.coverage(true_prevs), region.montecarlo_proportion(), winkler
|
||||
# winkler_e = None
|
||||
cov_soft = None
|
||||
return region.coverage(true_prevs), region.montecarlo_proportion(), winkler, cov_soft
|
||||
|
||||
out = Parallel(n_jobs=3)(
|
||||
delayed(process_one)(samples, true_prevs)
|
||||
|
|
@ -71,8 +79,8 @@ def compute_coverage_amplitude(region_constructor, **kwargs):
|
|||
)
|
||||
|
||||
# unzip results
|
||||
coverage, amplitude, winkler = zip(*out)
|
||||
return list(coverage), list(amplitude), list(winkler)
|
||||
coverage, amplitude, winkler, cov_soft = zip(*out)
|
||||
return list(coverage), list(amplitude), list(winkler), list(cov_soft)
|
||||
|
||||
|
||||
def update_pickle(report, pickle_path, updated_dict:dict):
|
||||
|
|
@ -83,35 +91,151 @@ def update_pickle(report, pickle_path, updated_dict:dict):
|
|||
|
||||
def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
|
||||
if f'coverage-{conf_name}' not in report:
|
||||
covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs)
|
||||
|
||||
# amperr (lower is better) counts the amplitude when the true vale was covered, or 1 (max amplitude) otherwise
|
||||
amperrs = [amp if cov == 1.0 else 1. for amp, cov in zip(amps, covs)]
|
||||
covs, amps, winkler, cov_soft = compute_coverage_amplitude(conf_region_class, **kwargs)
|
||||
|
||||
update_fields = {
|
||||
f'coverage-{conf_name}': covs,
|
||||
f'amplitude-{conf_name}': amps,
|
||||
f'winkler-{conf_name}': winkler,
|
||||
f'amperr-{conf_name}': amperrs,
|
||||
f'coverage-soft-{conf_name}': cov_soft
|
||||
}
|
||||
|
||||
update_pickle(report, file, update_fields)
|
||||
|
||||
|
||||
def pareto_front(df, x_col, y_col, maximize_y=True, minimize_x=True):
|
||||
"""
|
||||
Returns a boolean mask indicating whether each row is Pareto-optimal.
|
||||
"""
|
||||
X = df[x_col].values
|
||||
Y = df[y_col].values
|
||||
|
||||
def nicer(name:str):
|
||||
is_pareto = np.ones(len(df), dtype=bool)
|
||||
|
||||
for i in range(len(df)):
|
||||
if not is_pareto[i]:
|
||||
continue
|
||||
for j in range(len(df)):
|
||||
if i == j:
|
||||
continue
|
||||
|
||||
better_or_equal_x = X[j] <= X[i] if minimize_x else X[j] >= X[i]
|
||||
better_or_equal_y = Y[j] >= Y[i] if maximize_y else Y[j] <= Y[i]
|
||||
|
||||
strictly_better = (
|
||||
(X[j] < X[i] if minimize_x else X[j] > X[i]) or
|
||||
(Y[j] > Y[i] if maximize_y else Y[j] < Y[i])
|
||||
)
|
||||
|
||||
if better_or_equal_x and better_or_equal_y and strictly_better:
|
||||
is_pareto[i] = False
|
||||
break
|
||||
|
||||
return is_pareto
|
||||
|
||||
def plot_coverage_vs_amplitude(
|
||||
df,
|
||||
coverage_col,
|
||||
amplitude_col="a-CI",
|
||||
method_col="method",
|
||||
dataset_col=None,
|
||||
error_col=None,
|
||||
error_threshold=None,
|
||||
nominal_coverage=0.95,
|
||||
title=None,
|
||||
):
|
||||
df_plot = df.copy()
|
||||
|
||||
# Optional error filtering
|
||||
if error_col is not None and error_threshold is not None:
|
||||
df_plot = df_plot[df_plot[error_col] <= error_threshold]
|
||||
|
||||
# Compute Pareto front
|
||||
pareto_mask = pareto_front(
|
||||
df_plot,
|
||||
x_col=amplitude_col,
|
||||
y_col=coverage_col,
|
||||
maximize_y=True,
|
||||
minimize_x=True
|
||||
)
|
||||
|
||||
plt.figure(figsize=(7, 6))
|
||||
|
||||
# Base scatter
|
||||
sns.scatterplot(
|
||||
data=df_plot,
|
||||
x=amplitude_col,
|
||||
y=coverage_col,
|
||||
hue=method_col,
|
||||
# style=dataset_col,
|
||||
alpha=0.6,
|
||||
s=60,
|
||||
legend=True
|
||||
)
|
||||
|
||||
# Highlight Pareto front
|
||||
plt.scatter(
|
||||
df_plot.loc[pareto_mask, amplitude_col],
|
||||
df_plot.loc[pareto_mask, coverage_col],
|
||||
facecolors='none',
|
||||
edgecolors='black',
|
||||
s=120,
|
||||
linewidths=1.5,
|
||||
label="Pareto front"
|
||||
)
|
||||
|
||||
# Nominal coverage line
|
||||
plt.axhline(
|
||||
nominal_coverage,
|
||||
linestyle="--",
|
||||
color="gray",
|
||||
linewidth=1,
|
||||
label="Nominal coverage"
|
||||
)
|
||||
|
||||
plt.xlabel("Amplitude (fraction of simplex)")
|
||||
plt.ylabel("Coverage")
|
||||
plt.ylim(0, 1.05)
|
||||
|
||||
if title is not None:
|
||||
plt.title(title)
|
||||
|
||||
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
|
||||
|
||||
def nicer_method(name:str):
|
||||
replacements = {
|
||||
'Bayesian': 'Ba',
|
||||
# 'Bayesian': 'Ba',
|
||||
'Bootstrap': 'Bo',
|
||||
'-numpyro': '',
|
||||
'emcee': 'emc',
|
||||
'-T*': '*'
|
||||
'-T*': '*',
|
||||
'-T!': '',
|
||||
'!': '',
|
||||
'-Ait': r'$^{(\mathrm{Ait})}$',
|
||||
'-Gau': r'$^{(\mathrm{Gau})}$'
|
||||
}
|
||||
for k, v in replacements.items():
|
||||
name = name.replace(k,v)
|
||||
return name
|
||||
|
||||
|
||||
def nicer_data(name:str):
|
||||
replacements = {
|
||||
'cifar': 'CIFAR',
|
||||
'-l': '',
|
||||
'mnist': 'MNIST',
|
||||
'fashionmnist': 'fashionMNIST',
|
||||
'svhn': 'SVHN',
|
||||
'100coarse': '100(20)',
|
||||
}
|
||||
for k, v in replacements.items():
|
||||
name = name.replace(k, v)
|
||||
return name
|
||||
|
||||
|
||||
base_dir = RESULT_DIR
|
||||
|
||||
table = defaultdict(list)
|
||||
|
|
@ -119,62 +243,56 @@ n_classes = {}
|
|||
tr_size = {}
|
||||
tr_prev = {}
|
||||
|
||||
for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
|
||||
problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass'
|
||||
path = f'./{base_dir}/{problem_type}/*.pkl'
|
||||
|
||||
for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
|
||||
file = Path(file)
|
||||
dataset, method = file.name.replace('.pkl', '').split('__')
|
||||
if (method not in methods) or (dataset not in dataset_handler.get_datasets()):
|
||||
continue
|
||||
|
||||
report = pickle.load(open(file, 'rb'))
|
||||
results = report['results']
|
||||
n_samples = len(results['ae'])
|
||||
table['method'].extend([nicer(method)] * n_samples)
|
||||
table['dataset'].extend([dataset] * n_samples)
|
||||
table['ae'].extend(results['ae'])
|
||||
table['rae'].extend(results['rae'])
|
||||
# table['c-CI'].extend(results['coverage'])
|
||||
# table['a-CI'].extend(results['amplitude'])
|
||||
|
||||
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
|
||||
# update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
||||
# update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
||||
# update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
||||
|
||||
table['c-CI'].extend(report['coverage-CI'])
|
||||
table['a-CI'].extend(report['amplitude-CI'])
|
||||
table['w-CI'].extend(report['winkler-CI'])
|
||||
table['amperr-CI'].extend(report['amperr-CI'])
|
||||
|
||||
# table['c-CE'].extend(report['coverage-CE'])
|
||||
# table['a-CE'].extend(report['amplitude-CE'])
|
||||
# table['amperr-CE'].extend(report['amperr-CE'])
|
||||
|
||||
# table['c-CLR'].extend(report['coverage-CLR'])
|
||||
# table['a-CLR'].extend(report['amplitude-CLR'])
|
||||
# table['amperr-CLR'].extend(report['amperr-CLR'])
|
||||
|
||||
# table['c-ILR'].extend(report['coverage-ILR'])
|
||||
# table['a-ILR'].extend(report['amplitude-ILR'])
|
||||
# table['amperr-ILR'].extend(report['amperr-ILR'])
|
||||
|
||||
table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
|
||||
table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
|
||||
# table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
|
||||
# table['aitch'].extend()
|
||||
# table['reg-score-ILR'].extend(
|
||||
# [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
|
||||
# )
|
||||
|
||||
for dataset in dataset_handler.iter():
|
||||
dataset_class = [UCIMulticlassHandler, CIFAR100Handler, VisualDataHandler, LeQuaHandler]
|
||||
dataset_order = []
|
||||
for handler in dataset_class:
|
||||
for dataset in handler.iter():
|
||||
dataset_order.append(dataset.name)
|
||||
train = dataset.get_training()
|
||||
n_classes[dataset.name] = train.n_classes
|
||||
tr_size[dataset.name] = len(train)
|
||||
tr_prev[dataset.name] = F.strprev(train.prevalence())
|
||||
|
||||
|
||||
problem_type = 'multiclass'
|
||||
path = f'./{base_dir}/{problem_type}/*.pkl'
|
||||
|
||||
for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
|
||||
file = Path(file)
|
||||
dataset, method = file.name.replace('.pkl', '').split('__')
|
||||
if (method not in methods) or (dataset not in dataset_order):
|
||||
continue
|
||||
|
||||
report = pickle.load(open(file, 'rb'))
|
||||
results = report['results']
|
||||
n_samples = len(results['ae'])
|
||||
table['method'].extend([nicer_method(method)] * n_samples)
|
||||
table['dataset'].extend([nicer_data(dataset)] * n_samples)
|
||||
table['ae'].extend(results['ae'])
|
||||
table['rae'].extend(results['rae'])
|
||||
# table['c-CI'].extend(results['coverage'])
|
||||
# table['a-CI'].extend(results['amplitude'])
|
||||
|
||||
# update_pickle_with_region(report, file, conf_name='CI-ILR', conf_region_class=ConfidenceIntervalsILR, bonferroni_correction=True)
|
||||
# update_pickle_with_region(report, file, conf_name='CI-CLR', conf_region_class=ConfidenceIntervalsCLR, bonferroni_correction=True)
|
||||
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
|
||||
update_pickle_with_region(report, file, conf_name='CInb', conf_region_class=ConfidenceIntervals, bonferroni_correction=False) # no Bonferroni-correction
|
||||
# update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
||||
# update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
||||
# update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
||||
|
||||
conf_bonferroni = 'CI'
|
||||
conf_name='CInb'
|
||||
table['c-CI'].extend(report[f'coverage-{conf_bonferroni}']) # the true coverage is better measured with Bonferroni-correction
|
||||
table['w-CI'].extend(report[f'winkler-{conf_name}'])
|
||||
table['cs-CI'].extend(report[f'coverage-soft-{conf_name}'])
|
||||
table['a-CI'].extend(report[f'amplitude-{conf_name}'])
|
||||
|
||||
# table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim'])) # not in this paper...
|
||||
table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
|
||||
|
||||
|
||||
|
||||
# remove datasets with more than max_classes classes
|
||||
# max_classes = 25
|
||||
# min_train = 500
|
||||
|
|
@ -190,19 +308,100 @@ for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, C
|
|||
# df = df[df["dataset"] != data_name]
|
||||
|
||||
|
||||
|
||||
|
||||
df = pd.DataFrame(table)
|
||||
df['a-CI'] *= 100
|
||||
df['c-CI'] *= 100
|
||||
df['cs-CI'] *= 100
|
||||
|
||||
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
|
||||
if problem_type == 'binary' and region=='ILR':
|
||||
continue
|
||||
for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']:
|
||||
for column in [f'a-{region}', 'ae', 'SRE', f'c-{region}', f'cs-{region}']: # f'w-{region}'
|
||||
pv = pd.pivot_table(
|
||||
df, index='dataset', columns='method', values=column, margins=True
|
||||
)
|
||||
pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
|
||||
pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
|
||||
#pv['tr-prev'] = pv.index.map(tr_prev)
|
||||
pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
|
||||
pv = pv.drop(columns=[col for col in pv.columns if col == "All" or col[-1]=='All'])
|
||||
print(f'{problem_type=} {column=}')
|
||||
print(pv)
|
||||
print('-'*80)
|
||||
|
||||
latex = LatexTable.from_dataframe(df, method='method', benchmark='dataset', value=column, name=column)
|
||||
latex.format.configuration.show_std = False
|
||||
#latex.reorder_methods([nicer_method(m) for m in methods])
|
||||
latex.reorder_benchmarks([nicer_data(d) for d in dataset_order])
|
||||
if column in ['ae', 'SRE']:
|
||||
latex.format.configuration.lower_is_better = True
|
||||
latex.format.configuration.stat_test = 'wilcoxon'
|
||||
#latex.format.configuration.stat_test = None
|
||||
# latex.format.configuration.show_std = True
|
||||
|
||||
if column in [f'c-{region}', f'cs-{region}']:
|
||||
latex.format.configuration.lower_is_better = False
|
||||
latex.format.configuration.stat_test = None
|
||||
latex.format.configuration.with_color = False
|
||||
latex.format.configuration.best_in_bold = False
|
||||
latex.format.configuration.with_rank = False
|
||||
latex.format.configuration.mean_prec = 0
|
||||
latex.add_format_modifier(
|
||||
format_modifier=FormatModifierSelectColor(
|
||||
comparison=SelectGreaterThan(reference_selector=89, input_selector=SelectByName())
|
||||
)
|
||||
)
|
||||
if column in [f'a-{region}']:
|
||||
latex.format.configuration.lower_is_better = True
|
||||
latex.format.configuration.stat_test = None
|
||||
latex.format.configuration.with_color = False
|
||||
latex.format.configuration.best_in_bold = False
|
||||
latex.format.configuration.mean_prec = 2
|
||||
latex.add_format_modifier(
|
||||
format_modifier=FormatModifierSelectColor(
|
||||
comparison=SelectSmallerThan(reference_selector=11, input_selector=SelectByName())
|
||||
)
|
||||
)
|
||||
# latex.add_format_modifier(
|
||||
# format_modifier=FormatModifierSelectColor(
|
||||
# comparison=SelectSmallerThan(reference_selector=0.01, input_selector=SelectByName()),
|
||||
# intensity=50
|
||||
# )
|
||||
# )
|
||||
|
||||
latex.format.configuration.resizebox=.5
|
||||
latex.latexPDF(pdf_path=f'./tables/{latex.name}.pdf')
|
||||
|
||||
|
||||
df = df[df['method']!='BaACC']
|
||||
df = df[df['method']!='BaACC*']
|
||||
df = df[df['method']!='BaACC+']
|
||||
df = df[df['method']!='BaKDE-Ait*']
|
||||
df = df[df['method']!='BaKDE-Gau*']
|
||||
df = df[df['method']!='BayEMQ*']
|
||||
grouped = df.groupby(["method", "dataset"])
|
||||
agg = grouped.agg(
|
||||
ae_mean=("ae", "mean"),
|
||||
ae_std=("ae", "std"),
|
||||
sre_mean=("SRE", "mean"),
|
||||
sre_std=("SRE", "std"),
|
||||
coverage_mean=("c-CI", "mean"),
|
||||
coverage_std=("c-CI", "std"),
|
||||
coverage_soft_mean=("cs-CI", "mean"),
|
||||
amplitude_mean=("a-CI", "mean"),
|
||||
amplitude_std=("a-CI", "std"),
|
||||
).reset_index()
|
||||
|
||||
#plot_coverage_vs_amplitude(
|
||||
# agg,
|
||||
# coverage_col="coverage_soft_mean",
|
||||
# amplitude_col="amplitude_mean",
|
||||
# method_col="method",
|
||||
# dataset_col="dataset",
|
||||
# nominal_coverage=0.95,
|
||||
# title="Marginal coverage vs amplitude"
|
||||
#)
|
||||
|
||||
|
||||
#print('RESTITUIR EL WILCOXON')
|
||||
|
|
|
|||
|
|
@ -0,0 +1,169 @@
|
|||
import os.path
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from copy import deepcopy as cp
|
||||
import quapy as qp
|
||||
from BayesianKDEy.commons import KDEyReduce
|
||||
from _bayeisan_kdey import BayesianKDEy
|
||||
from _bayesian_mapls import BayesianMAPLS
|
||||
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
|
||||
# import datasets
|
||||
from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
|
||||
from method.confidence import ConfidenceIntervals
|
||||
from temperature_calibration import temp_calibration
|
||||
from build.lib.quapy.data import LabelledCollection
|
||||
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
|
||||
from quapy.model_selection import GridSearchQ
|
||||
from quapy.data import Dataset
|
||||
from quapy.method.confidence import BayesianCC, AggregativeBootstrap
|
||||
from quapy.method.aggregative import KDEyML, ACC
|
||||
from quapy.protocol import UPP
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from collections import defaultdict
|
||||
from time import time
|
||||
|
||||
|
||||
def methods(data_handler: DatasetHandler):
|
||||
"""
|
||||
Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
|
||||
- name: is a str representing the name of the method (e.g., 'BayesianKDEy')
|
||||
- quantifier: is the base model (e.g., KDEyML())
|
||||
- hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
|
||||
- bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
|
||||
quantifier with optimized hyperparameters
|
||||
"""
|
||||
if isinstance(data_handler, VisualDataHandler):
|
||||
Cls = LogisticRegression
|
||||
cls_hyper = {}
|
||||
else:
|
||||
Cls = LogisticRegression
|
||||
cls_hyper = {'classifier__C': np.logspace(-4, 4, 9), 'classifier__class_weight': ['balanced', None]}
|
||||
kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
|
||||
kdey_hyper_larger = {'bandwidth': np.logspace(-1, 0, 10), **cls_hyper}
|
||||
kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
|
||||
|
||||
# surrogate quantifiers
|
||||
kde_gau_scale = KDEyScaledB(Cls())
|
||||
|
||||
yield 'KDEy-G-exp', kdey_hyper, KDEyML(Cls())
|
||||
# yield 'KDEy-G-exp2', kdey_hyper_larger, KDEyML(Cls())
|
||||
# yield 'KDEy-G-log', kdey_hyper, KDEyML(Cls(), logdensities=True)
|
||||
yield 'KDEy-Ait', kdey_hyper_clr, KDEyCLR(Cls())
|
||||
|
||||
|
||||
def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||
with qp.util.temp_seed(0):
|
||||
if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
|
||||
def scale_bandwidth(bandwidth, n_classes, beta=0.5):
|
||||
return bandwidth * np.power(n_classes, beta)
|
||||
|
||||
n = dataset.get_training().n_classes
|
||||
grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
|
||||
print('bandwidth scaled')
|
||||
print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
|
||||
# model selection
|
||||
if len(grid) > 0:
|
||||
train, val_prot = dataset.get_train_valprot_for_modsel()
|
||||
mod_sel = GridSearchQ(
|
||||
model=point_quantifier,
|
||||
param_grid=grid,
|
||||
protocol=val_prot,
|
||||
refit=False,
|
||||
n_jobs=-1,
|
||||
verbose=True
|
||||
).fit(*train.Xy)
|
||||
best_params = mod_sel.best_params_
|
||||
else:
|
||||
best_params = {}
|
||||
|
||||
return best_params
|
||||
|
||||
|
||||
def experiment(dataset: DatasetHandler,
|
||||
point_quantifier: AggregativeQuantifier,
|
||||
method_name: str,
|
||||
grid: dict,
|
||||
hyper_choice_path: Path):
|
||||
|
||||
with qp.util.temp_seed(0):
|
||||
# model selection
|
||||
best_hyperparams = qp.util.pickled_resource(
|
||||
hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid
|
||||
)
|
||||
print(f'{best_hyperparams=}')
|
||||
|
||||
t_init = time()
|
||||
training, test_generator = dataset.get_train_testprot_for_eval()
|
||||
point_quantifier.fit(*training.Xy)
|
||||
tr_time = time() - t_init
|
||||
|
||||
# test
|
||||
train_prevalence = training.prevalence()
|
||||
results = defaultdict(list)
|
||||
pbar = tqdm(enumerate(test_generator()), total=test_generator.total())
|
||||
for i, (sample_X, true_prevalence) in pbar:
|
||||
t_init = time()
|
||||
point_estimate = point_quantifier.predict(sample_X)
|
||||
ttime = time() - t_init
|
||||
|
||||
results['true-prevs'].append(true_prevalence)
|
||||
results['point-estim'].append(point_estimate)
|
||||
results['shift'].append(qp.error.ae(true_prevalence, train_prevalence))
|
||||
results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate))
|
||||
results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate))
|
||||
results['sre'].append(qp.error.sre(prevs_true=true_prevalence, prevs_hat=point_estimate, prevs_train=train_prevalence))
|
||||
results['test-time'].append(ttime)
|
||||
|
||||
pbar.set_description(
|
||||
f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f}')
|
||||
|
||||
report = {
|
||||
'optim_hyper': best_hyperparams,
|
||||
'train_time': tr_time,
|
||||
'train-prev': train_prevalence,
|
||||
'results': {k: np.asarray(v) for k, v in results.items()},
|
||||
}
|
||||
|
||||
return report
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
result_dir = Path('results_map')
|
||||
|
||||
reports = defaultdict(list)
|
||||
|
||||
for data_handler in [UCIMulticlassHandler]: # , UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
|
||||
for dataset in data_handler.iter():
|
||||
qp.environ['SAMPLE_SIZE'] = dataset.sample_size
|
||||
# print(f'dataset={dataset.name}')
|
||||
|
||||
problem_type = 'binary' if dataset.is_binary() else 'multiclass'
|
||||
for method_name, hyper_params, quantifier in methods(dataset):
|
||||
|
||||
result_path = experiment_path(result_dir / problem_type, dataset.name, method_name)
|
||||
hyper_path = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, method_name)
|
||||
|
||||
# if os.path.exists(result_path):
|
||||
report = qp.util.pickled_resource(
|
||||
result_path, experiment, dataset, quantifier, method_name, hyper_params, hyper_path
|
||||
)
|
||||
reports['dataset'].append(dataset.name)
|
||||
reports['method'].append(method_name)
|
||||
reports['MAE'].append(report["results"]["ae"].mean())
|
||||
reports['SRE'].append(report["results"]["sre"].mean())
|
||||
reports['h'].append(report["optim_hyper"]["bandwidth"])
|
||||
|
||||
print(f'dataset={dataset.name}, '
|
||||
f'method={method_name}: '
|
||||
f'mae={reports["MAE"][-1]:.5f}, '
|
||||
f'W={reports["SRE"][-1]:.5f} '
|
||||
f'h={reports["h"][-1]}')
|
||||
|
||||
pv = pd.DataFrame(reports).pivot_table(values=['MAE', 'SRE', 'h'], index='dataset', columns='method', margins=True)
|
||||
print(pv)
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
from abc import ABC, abstractmethod
|
||||
import numpy as np
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
||||
from BayesianKDEy._bayesian_mapls import BayesianMAPLS
|
||||
from BayesianKDEy.commons import KDEyCLR, KDEyCLR2
|
||||
from quapy.method.aggregative import CC, ACC, EMQ, DMy, KDEyML
|
||||
from quapy.method.base import BaseQuantifier
|
||||
from quapy.method.confidence import AggregativeBootstrap, BayesianCC
|
||||
|
||||
|
||||
def get_experimental_methods():
|
||||
#yield BootsCC()
|
||||
#yield BayACC()
|
||||
#yield BayEMQ()
|
||||
#yield BayKDEyGau()
|
||||
#yield BayKDEyAit()
|
||||
yield BayKDEyAit2()
|
||||
|
||||
|
||||
# commons
|
||||
# ------------------------------------------------------------
|
||||
Cls = LogisticRegression
|
||||
cls_hyper = {
|
||||
'classifier__C': np.logspace(-4,4,9),
|
||||
'classifier__class_weight': ['balanced', None]
|
||||
}
|
||||
hdy_hyper = {'nbins': [3, 4, 5, 8, 9, 10, 12, 14, 16, 32], **cls_hyper}
|
||||
kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
|
||||
kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
|
||||
|
||||
def hyper2cls(hyperparams):
|
||||
return {k.replace('classifier__', ''): v for k, v in hyperparams.items()}
|
||||
|
||||
|
||||
# method descriptor logic
|
||||
# ------------------------------------------------------------
|
||||
class MethodDescriptor(ABC):
|
||||
def __init__(self,
|
||||
name: str,
|
||||
surrogate_quantifier: BaseQuantifier,
|
||||
hyper_parameters: dict,
|
||||
):
|
||||
self.name = name
|
||||
self.surrogate_quantifier_ = surrogate_quantifier
|
||||
self.hyper_parameters = hyper_parameters
|
||||
|
||||
@abstractmethod
|
||||
def binary_only(self): ...
|
||||
|
||||
def surrogate_quantifier(self):
|
||||
return self.surrogate_quantifier_
|
||||
|
||||
def surrogate_quantifier_name(self):
|
||||
return self.surrogate_quantifier_.__class__.__name__
|
||||
|
||||
@abstractmethod
|
||||
def uncertainty_aware_quantifier(self, hyperparameters): ...
|
||||
|
||||
|
||||
class MulticlassMethodDescriptor(MethodDescriptor):
|
||||
|
||||
def binary_only(self):
|
||||
return False
|
||||
|
||||
|
||||
# specific methods definitions
|
||||
# ------------------------------------------------------------
|
||||
# ------------------------------------------------------------
|
||||
|
||||
# Bootstrap approaches:
|
||||
# ------------------------------------------------------------
|
||||
class BootsCC(MulticlassMethodDescriptor):
|
||||
def __init__(self):
|
||||
super().__init__(name='BoCC', surrogate_quantifier=CC(Cls()), hyper_parameters=cls_hyper)
|
||||
|
||||
def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
quantifier = CC(Cls()).set_params(**hyperparameters)
|
||||
return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
|
||||
|
||||
|
||||
# class BootsACC(MulticlassMethodDescriptor):
|
||||
# def __init__(self):
|
||||
# super().__init__(name='BoACC', surrogate_quantifier=ACC(Cls()), hyper_parameters=cls_hyper)
|
||||
#
|
||||
# def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
# quantifier = ACC(Cls()).set_params(**hyperparameters)
|
||||
# return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
|
||||
#
|
||||
#
|
||||
# class BootsEMQ(MulticlassMethodDescriptor):
|
||||
# def __init__(self):
|
||||
# super().__init__(name='BoEMQ', surrogate_quantifier=EMQ(Cls(), exact_train_prev=False), hyper_parameters=cls_hyper)
|
||||
#
|
||||
# def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
# quantifier = EMQ(Cls(), exact_train_prev=False).set_params(**hyperparameters)
|
||||
# return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
|
||||
|
||||
|
||||
# class BootsHDy(MethodDescriptor):
|
||||
# def __init__(self):
|
||||
# super().__init__(name='BoHDy', surrogate_quantifier=DMy(Cls()), hyper_parameters=hdy_hyper)
|
||||
#
|
||||
# def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
# quantifier = DMy(Cls()).set_params(**hyperparameters)
|
||||
# return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
|
||||
#
|
||||
# def binary_only(self):
|
||||
# return True
|
||||
|
||||
# class BootsKDEy(MulticlassMethodDescriptor):
|
||||
# def __init__(self):
|
||||
# super().__init__(name='BoKDEy', surrogate_quantifier=KDEyML(Cls()), hyper_parameters=kdey_hyper)
|
||||
#
|
||||
# def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
# quantifier = KDEyML(Cls()).set_params(**hyperparameters)
|
||||
# return AggregativeBootstrap(quantifier, n_test_samples=1000, random_state=0)
|
||||
|
||||
|
||||
# Bayesian approaches:
|
||||
# ------------------------------------------------------------
|
||||
class BayACC(MulticlassMethodDescriptor):
|
||||
def __init__(self):
|
||||
super().__init__(name='BaACC!', surrogate_quantifier=ACC(Cls()), hyper_parameters=cls_hyper)
|
||||
|
||||
def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
classifier = Cls(**hyper2cls(hyperparameters))
|
||||
return BayesianCC(classifier, temperature=None, mcmc_seed=0) # is actually a Bayesian variant of ACC
|
||||
|
||||
|
||||
class BayEMQ(MulticlassMethodDescriptor):
|
||||
def __init__(self):
|
||||
super().__init__(name='BaEMQ!', surrogate_quantifier=EMQ(Cls(), exact_train_prev=False), hyper_parameters=cls_hyper)
|
||||
|
||||
def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
classifier = Cls(**hyper2cls(hyperparameters))
|
||||
return BayesianMAPLS(classifier, prior='uniform', temperature=None, exact_train_prev=False)
|
||||
|
||||
|
||||
class BayKDEyGau(MulticlassMethodDescriptor):
|
||||
def __init__(self):
|
||||
kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
|
||||
super().__init__(name='BaKDE-Gau-T!', surrogate_quantifier=KDEyML(Cls()), hyper_parameters=kdey_hyper)
|
||||
|
||||
def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
return BayesianKDEy(Cls(), kernel='gaussian', temperature=None, mcmc_seed=0, **hyperparameters)
|
||||
|
||||
|
||||
class BayKDEyAit(MulticlassMethodDescriptor):
|
||||
def __init__(self):
|
||||
kdey_hyper = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}
|
||||
super().__init__(name='BaKDE-Ait-T!', surrogate_quantifier=KDEyCLR(Cls()), hyper_parameters=kdey_hyper)
|
||||
|
||||
def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
return BayesianKDEy(Cls(), kernel='aitchison', temperature=None, mcmc_seed=0, **hyperparameters)
|
||||
|
||||
|
||||
class BayKDEyAit2(MulticlassMethodDescriptor):
|
||||
def __init__(self):
|
||||
kdey_hyper = {'bandwidth': np.linspace(0.05, 2., 10), **cls_hyper}
|
||||
super().__init__(name='BaKDE-Ait-T!2', surrogate_quantifier=KDEyCLR2(Cls()), hyper_parameters=kdey_hyper)
|
||||
|
||||
def uncertainty_aware_quantifier(self, hyperparameters):
|
||||
return BayesianKDEy(Cls(), kernel='aitchison', temperature=None, mcmc_seed=0, **hyperparameters)
|
||||
|
||||
|
||||
|
||||
|
|
@ -10,6 +10,8 @@ from sklearn.preprocessing import MinMaxScaler
|
|||
|
||||
from BayesianKDEy.commons import antagonistic_prevalence, in_simplex
|
||||
from method.confidence import (ConfidenceIntervals as CI,
|
||||
ConfidenceIntervalsCLR as CICLR,
|
||||
ConfidenceIntervalsILR as CIILR,
|
||||
ConfidenceEllipseSimplex as CE,
|
||||
ConfidenceEllipseCLR as CLR,
|
||||
ConfidenceEllipseILR as ILR)
|
||||
|
|
@ -260,6 +262,8 @@ def plot_regions(ax, region_layers, resolution, confine):
|
|||
)
|
||||
|
||||
|
||||
|
||||
|
||||
def plot_points(ax, point_layers):
|
||||
for layer in point_layers:
|
||||
pts = layer["points"]
|
||||
|
|
@ -433,25 +437,34 @@ def plot_kernels():
|
|||
if __name__ == '__main__':
|
||||
np.random.seed(1)
|
||||
|
||||
# n = 1000
|
||||
# alpha = [1,1,1]
|
||||
# prevs = np.random.dirichlet(alpha, size=n)
|
||||
n = 1000
|
||||
alpha = [15,10,7]
|
||||
prevs = np.random.dirichlet(alpha, size=n)
|
||||
|
||||
# def regions():
|
||||
# confs = [0.99, 0.95, 0.90]
|
||||
# yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
|
||||
def regions():
|
||||
confs = [0.9, 0.95, 0.99]
|
||||
# yield 'CI', [(f'{int(c*100)}%', CI(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CI-b', [(f'{int(c * 100)}%', CI(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
|
||||
# yield 'CI-CLR', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CI-CLR-b', [(f'{int(c * 100)}%', CICLR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
|
||||
# yield 'CI-ILR', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
yield 'CI-ILR-b', [(f'{int(c * 100)}%', CIILR(prevs, confidence_level=c, bonferroni_correction=True).coverage) for c in confs]
|
||||
# yield 'CE', [(f'{int(c*100)}%', CE(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CE-CLR', [(f'{int(c*100)}%', CLR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
# yield 'CE-ILR', [(f'{int(c*100)}%', ILR(prevs, confidence_level=c).coverage) for c in confs]
|
||||
|
||||
# resolution = 1000
|
||||
# alpha_str = ','.join([f'{str(i)}' for i in alpha])
|
||||
# for crname, cr in regions():
|
||||
# plot_prev_points(prevs, show_mean=True, show_legend=False, region=cr, region_resolution=resolution,
|
||||
# color='blue',
|
||||
# save_path=f'./plots/simplex_{crname}_alpha{alpha_str}_res{resolution}.png',
|
||||
# )
|
||||
resolution = 1000
|
||||
alpha_str = ','.join([f'{str(i)}' for i in alpha])
|
||||
|
||||
dot_style = {"color": "gray", "alpha": .5, "s": 15, 'linewidth': .25, 'edgecolors': "black"}
|
||||
point_layer = [
|
||||
{"points": prevs, "label": "points", "style": dot_style},
|
||||
]
|
||||
|
||||
for crname, cr in regions():
|
||||
region = [{'fn': fn, 'alpha':.6, 'label':label} for label, fn in cr]
|
||||
|
||||
plot_simplex(point_layers=point_layer, region_layers=region, show_legend=False, resolution=resolution, save_path=f'./plots/regions/{crname}.png')
|
||||
|
||||
# def regions():
|
||||
# confs = [0.99, 0.95, 0.90]
|
||||
|
|
@ -544,4 +557,4 @@ if __name__ == '__main__':
|
|||
# save_path=f'./plots/prior_test/concentration_{c}.png'
|
||||
# )
|
||||
|
||||
plot_kernels()
|
||||
# plot_kernels()
|
||||
|
|
@ -13,12 +13,14 @@ def temp_calibration(method:WithConfidenceABC,
|
|||
val_prot:AbstractProtocol,
|
||||
temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.],
|
||||
nominal_coverage=0.95,
|
||||
amplitude_threshold='auto',
|
||||
amplitude_threshold=1.,
|
||||
criterion='winkler',
|
||||
n_jobs=1,
|
||||
verbose=True):
|
||||
|
||||
assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold < 1.), \
|
||||
f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float < 1.0.'
|
||||
assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold <= 1.), \
|
||||
f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float <= 1.0.'
|
||||
assert criterion in {'auto', 'winkler'}, f'unknown {criterion=}; valid ones are auto or winkler'
|
||||
|
||||
if amplitude_threshold=='auto':
|
||||
n_classes = train.n_classes
|
||||
|
|
@ -27,15 +29,16 @@ def temp_calibration(method:WithConfidenceABC,
|
|||
if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
|
||||
print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
|
||||
|
||||
def evaluate_temperature_job(job_id, temp):
|
||||
if verbose:
|
||||
print(f'\tstarting exploration with temperature={temp}...')
|
||||
def _evaluate_temperature_job(job_id, temp):
|
||||
# if verbose:
|
||||
# print(f'\tstarting exploration with temperature={temp}...')
|
||||
|
||||
local_method = copy.deepcopy(method)
|
||||
local_method.temperature = temp
|
||||
|
||||
coverage = 0
|
||||
amplitudes = []
|
||||
winklers = []
|
||||
# errs = []
|
||||
|
||||
pbar = tqdm(enumerate(val_prot()), position=job_id, total=val_prot.total(), disable=not verbose)
|
||||
|
|
@ -47,35 +50,62 @@ def temp_calibration(method:WithConfidenceABC,
|
|||
coverage += 1
|
||||
|
||||
amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
|
||||
winkler = None
|
||||
if criterion=='winkler':
|
||||
winkler = conf_region.mean_winkler_score(true_prev=prev, alpha=0.005)
|
||||
winklers.append(winkler)
|
||||
|
||||
# errs.append(qp.error.mae(prev, point_estim))
|
||||
pbar.set_description(f'job={job_id} T={temp}: coverage={coverage/(i+1)*100:.2f}% amplitude={np.mean(amplitudes)*100:.2f}%')
|
||||
pbar.set_description(
|
||||
f'job={job_id} T={temp}: '
|
||||
f'coverage={coverage/(i+1)*100:.2f}% '
|
||||
f'amplitude={np.mean(amplitudes)*100:.4f}% '
|
||||
+ f'winkler={np.mean(winklers):.4f}%' if criterion=='winkler' else ''
|
||||
)
|
||||
|
||||
mean_coverage = coverage / val_prot.total()
|
||||
mean_amplitude = np.mean(amplitudes)
|
||||
winkler_mean = np.mean(winklers) if criterion=='winkler' else None
|
||||
|
||||
if verbose:
|
||||
print(f'Temperature={temp} got coverage={mean_coverage*100:.2f}% amplitude={mean_amplitude*100:.2f}%')
|
||||
# if verbose:
|
||||
# print(
|
||||
# f'Temperature={temp} got '
|
||||
# f'coverage={mean_coverage*100:.2f}% '
|
||||
# f'amplitude={mean_amplitude*100:.2f}% '
|
||||
# + f'winkler={winkler_mean:.4f}' if criterion == 'winkler' else ''
|
||||
# )
|
||||
|
||||
return temp, mean_coverage, mean_amplitude
|
||||
return temp, mean_coverage, mean_amplitude, winkler_mean
|
||||
|
||||
temp_grid = sorted(temp_grid)
|
||||
method.fit(*train.Xy)
|
||||
|
||||
raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
|
||||
delayed(evaluate_temperature_job)(job_id, temp)
|
||||
delayed(_evaluate_temperature_job)(job_id, temp)
|
||||
for job_id, temp in tqdm(enumerate(temp_grid), disable=not verbose)
|
||||
)
|
||||
results = [
|
||||
(temp, cov, amp)
|
||||
for temp, cov, amp in raw_results
|
||||
(temp, cov, amp, wink)
|
||||
for temp, cov, amp, wink in raw_results
|
||||
if amp < amplitude_threshold
|
||||
]
|
||||
|
||||
chosen_temperature = 1.
|
||||
if len(results) > 0:
|
||||
chosen_temperature = min(results, key=lambda x: abs(x[1]-nominal_coverage))[0]
|
||||
if criterion=='winkler':
|
||||
# choose min winkler
|
||||
chosen_temperature, ccov, camp, cwink = min(results, key=lambda x: x[3])
|
||||
else:
|
||||
# choose best coverage (regardless of amplitude), i.e., closest to nominal
|
||||
chosen_temperature, ccov, camp, cwink = min(results, key=lambda x: abs(x[1]-nominal_coverage))
|
||||
|
||||
print(f'chosen_temperature={chosen_temperature:.2f}')
|
||||
if verbose:
|
||||
print(
|
||||
f'\nChosen_temperature={chosen_temperature:.2f} got '
|
||||
f'coverage={ccov*100:.2f}% '
|
||||
f'amplitude={camp*100:.4f}% '
|
||||
+ f'winkler={cwink:.4f}' if criterion=='winkler' else ''
|
||||
)
|
||||
|
||||
return chosen_temperature
|
||||
|
||||
|
|
|
|||
|
|
@ -1,15 +1,21 @@
|
|||
Change Log 0.2.1
|
||||
-----------------
|
||||
|
||||
- Added mechanisms for Temperature Calibration for coverage
|
||||
- Added MAPLS and BayesianEMQ
|
||||
- Added DirichletProtocol, which allows to generate samples according to a parameterized Dirichlet prior.
|
||||
- Added squared ratio error.
|
||||
- Improved efficiency of confidence regions coverage functions
|
||||
- Added Precise Quantifier to WithConfidence methods (a Bayesian adaptation of HDy)
|
||||
- Added Temperature parameter to BayesianCC
|
||||
- Improved documentation of confidence regions.
|
||||
- Added ReadMe method by Daniel Hopkins and Gary King
|
||||
- Internal index in LabelledCollection is now "lazy", and is only constructed if required.
|
||||
- Added dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
|
||||
- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where densities for small bandwidths may become huge
|
||||
- Added new error metrics:
|
||||
- dist_aitchison and mean_dist_aitchison as a new error evaluation metric.
|
||||
- squared ratio error.
|
||||
- Improved numerical stability of KDEyML through logsumexp; useful for cases with large number of classes, where
|
||||
densities for small bandwidths may become huge.
|
||||
|
||||
|
||||
Change Log 0.2.0
|
||||
-----------------
|
||||
|
|
|
|||
15
TODO.txt
15
TODO.txt
|
|
@ -1,6 +1,11 @@
|
|||
Adapt examples; remaining: example 4-onwards
|
||||
not working: 15 (qunfold)
|
||||
|
||||
Unify ConfidenceIntervalsTransformation with ConfidenceEllipseTransformation
|
||||
|
||||
Unify functionality of withconfidence methods; the predict_conf has a clear similar structure across all
|
||||
variants, and should be unified in the super class
|
||||
|
||||
Solve the warnings issue; right now there is a warning ignore in method/__init__.py:
|
||||
|
||||
Add 'platt' to calib options in EMQ?
|
||||
|
|
@ -46,7 +51,15 @@ Para quitar el labelledcollection de los métodos:
|
|||
- proporción en [0,1]
|
||||
- fit_classifier=False:
|
||||
|
||||
|
||||
- [TODO] add RLSbench?:
|
||||
- https://arxiv.org/pdf/2302.03020
|
||||
- https://github.com/acmi-lab/RLSbench
|
||||
- [TODO] check Table shift
|
||||
- https://proceedings.neurips.cc/paper_files/paper/2023/hash/a76a757ed479a1e6a5f8134bea492f83-Abstract-Datasets_and_Benchmarks.html
|
||||
- [TODO] have a look at TorchDrift
|
||||
- https://torchdrift.org/
|
||||
- [TODO] have a look at
|
||||
- https://github.com/SeldonIO/alibi-detect/
|
||||
- [TODO] check if the KDEyML variant with sumlogexp is slower than the original one, or check whether we can explore
|
||||
an unconstrained space in which the parameter is already the log(prev); maybe also move to cvxq
|
||||
- [TODO] why not simplifying the epsilon of RAE? at the end, it is meant to smooth the denominator for avoiding div 0
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ P_TEST_C: str = "P_test(C)"
|
|||
P_C_COND_Y: str = "P(C|Y)"
|
||||
|
||||
|
||||
def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, alpha: np.ndarray) -> None:
|
||||
def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, temperature:float, alpha: np.ndarray) -> None:
|
||||
"""
|
||||
Defines a probabilistic model in `NumPyro <https://num.pyro.ai/>`_.
|
||||
|
||||
|
|
@ -50,11 +50,40 @@ def model_bayesianCC(n_c_unlabeled: np.ndarray, n_y_and_c_labeled: np.ndarray, a
|
|||
pi_ = numpyro.sample(P_TEST_Y, dist.Dirichlet(jnp.asarray(alpha, dtype=jnp.float32)))
|
||||
p_c_cond_y = numpyro.sample(P_C_COND_Y, dist.Dirichlet(jnp.ones(K).repeat(L).reshape(L, K)))
|
||||
|
||||
with numpyro.plate('plate', L):
|
||||
numpyro.sample('F_yc', dist.Multinomial(n_y_labeled, p_c_cond_y), obs=n_y_and_c_labeled)
|
||||
if temperature==1:
|
||||
# original implementation
|
||||
with numpyro.plate('plate', L):
|
||||
numpyro.sample('F_yc', dist.Multinomial(n_y_labeled, p_c_cond_y), obs=n_y_and_c_labeled)
|
||||
|
||||
p_c = numpyro.deterministic(P_TEST_C, jnp.einsum("yc,y->c", p_c_cond_y, pi_))
|
||||
numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled)
|
||||
p_c = numpyro.deterministic(P_TEST_C, jnp.einsum("yc,y->c", p_c_cond_y, pi_))
|
||||
numpyro.sample('N_c', dist.Multinomial(jnp.sum(n_c_unlabeled), p_c), obs=n_c_unlabeled)
|
||||
else:
|
||||
# with temperature modification
|
||||
with numpyro.plate('plate_y', L):
|
||||
logp_F = dist.Multinomial(
|
||||
n_y_labeled,
|
||||
p_c_cond_y
|
||||
).log_prob(n_y_and_c_labeled)
|
||||
|
||||
numpyro.factor(
|
||||
'F_yc_loglik',
|
||||
jnp.sum(logp_F) / temperature
|
||||
)
|
||||
p_c = numpyro.deterministic(
|
||||
P_TEST_C,
|
||||
jnp.einsum("yc,y->c", p_c_cond_y, pi_)
|
||||
)
|
||||
|
||||
# Likelihood datos no etiquetados
|
||||
logp_N = dist.Multinomial(
|
||||
jnp.sum(n_c_unlabeled),
|
||||
p_c
|
||||
).log_prob(n_c_unlabeled)
|
||||
|
||||
numpyro.factor(
|
||||
'N_c_loglik',
|
||||
logp_N / temperature
|
||||
)
|
||||
|
||||
|
||||
def sample_posterior_bayesianCC(
|
||||
|
|
@ -63,6 +92,7 @@ def sample_posterior_bayesianCC(
|
|||
num_warmup: int,
|
||||
num_samples: int,
|
||||
alpha: np.ndarray,
|
||||
temperature = 1.,
|
||||
seed: int = 0,
|
||||
) -> dict:
|
||||
"""
|
||||
|
|
@ -88,7 +118,7 @@ def sample_posterior_bayesianCC(
|
|||
progress_bar=False
|
||||
)
|
||||
rng_key = jax.random.PRNGKey(seed)
|
||||
mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, alpha=alpha)
|
||||
mcmc.run(rng_key, n_c_unlabeled=n_c_unlabeled, n_y_and_c_labeled=n_y_and_c_labeled, temperature=temperature, alpha=alpha)
|
||||
return mcmc.get_samples()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -178,17 +178,24 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
|
|||
with qp.util.temp_seed(self.random_state):
|
||||
epsilon = 1e-12
|
||||
n_classes = len(self.mix_densities)
|
||||
#test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
|
||||
test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
|
||||
if n_classes>=30:
|
||||
# new version: improves numerical stability with logsumexp, at the cost of optimization efficiency.
|
||||
# needed if the number of classes is large (approx >= 30) because densities tend to grow exponentially
|
||||
test_log_densities = [self.pdf(kde_i, posteriors, self.kernel, log_densities=True) for kde_i in self.mix_densities]
|
||||
|
||||
#def neg_loglikelihood(prev):
|
||||
# prev = np.clip(prev, epsilon, 1.0)
|
||||
# test_mixture_likelihood = prev @ test_densities
|
||||
# test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
||||
# return -np.sum(test_loglikelihood)
|
||||
def neg_loglikelihood(prev):
|
||||
test_loglikelihood = logsumexp(np.log(np.clip(prev, epsilon, 1.0))[:,None] + test_log_densities, axis=0)
|
||||
return -np.sum(test_loglikelihood)
|
||||
def neg_loglikelihood(prev):
|
||||
prev = np.clip(prev, epsilon, 1.0)
|
||||
test_loglikelihood = logsumexp(np.log(prev)[:,None] + test_log_densities, axis=0)
|
||||
return -np.sum(test_loglikelihood)
|
||||
else:
|
||||
# original implementation
|
||||
test_densities = [self.pdf(kde_i, posteriors, self.kernel) for kde_i in self.mix_densities]
|
||||
|
||||
def neg_loglikelihood(prev):
|
||||
# prev = np.clip(prev, epsilon, 1.0)
|
||||
test_mixture_likelihood = prev @ test_densities
|
||||
test_loglikelihood = np.log(test_mixture_likelihood + epsilon)
|
||||
return -np.sum(test_loglikelihood)
|
||||
|
||||
return F.optim_minimize(neg_loglikelihood, n_classes)
|
||||
|
||||
|
|
|
|||
|
|
@ -163,6 +163,7 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
|||
|
||||
:param X: array-like of shape `(n_samples, n_features)`, the training instances
|
||||
:param y: array-like of shape `(n_samples,)`, the labels
|
||||
:return: a tuple (predictions, labels)
|
||||
"""
|
||||
self._check_classifier(adapt_if_necessary=self.fit_classifier)
|
||||
|
||||
|
|
|
|||
|
|
@ -341,6 +341,7 @@ class ConfidenceIntervals(ConfidenceRegionABC):
|
|||
"""
|
||||
def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
|
||||
assert 0 < confidence_level < 1, f'{confidence_level=} must be in range(0,1)'
|
||||
assert samples.ndim == 2, 'unexpected shape; must be (n_bootstrap_samples, n_classes)'
|
||||
|
||||
samples = np.asarray(samples)
|
||||
|
||||
|
|
@ -383,6 +384,10 @@ class ConfidenceIntervals(ConfidenceRegionABC):
|
|||
|
||||
return proportion
|
||||
|
||||
def coverage_soft(self, true_value):
|
||||
within_intervals = np.logical_and(self.I_low <= true_value, true_value <= self.I_high)
|
||||
return np.mean(within_intervals.astype(float))
|
||||
|
||||
def __repr__(self):
|
||||
return '['+', '.join(f'({low:.4f}, {high:.4f})' for (low,high) in zip(self.I_low, self.I_high))+']'
|
||||
|
||||
|
|
@ -390,25 +395,30 @@ class ConfidenceIntervals(ConfidenceRegionABC):
|
|||
def n_dim(self):
|
||||
return len(self.I_low)
|
||||
|
||||
def winkler_scores(self, true_prev):
|
||||
def winkler_scores(self, true_prev, alpha=None, add_ae=False):
|
||||
true_prev = np.asarray(true_prev)
|
||||
assert true_prev.ndim == 1, 'unexpected dimensionality for true_prev'
|
||||
assert len(true_prev)==self.n_dim, \
|
||||
f'unexpected number of dimensions; found {true_prev.ndim}, expected {self.n_dim}'
|
||||
|
||||
def winkler_score(low, high, true_val, alpha):
|
||||
def winkler_score(low, high, true_val, alpha, center):
|
||||
amp = high-low
|
||||
scale_cost = 1./alpha
|
||||
scale_cost = 2./alpha
|
||||
cost = np.max([0, low-true_val], axis=0) + np.max([0, true_val-high], axis=0)
|
||||
return amp + scale_cost*cost
|
||||
err = 0
|
||||
if add_ae:
|
||||
err = abs(true_val - center)
|
||||
return amp + scale_cost*cost + err
|
||||
|
||||
alpha = alpha or self.alpha
|
||||
return np.asarray(
|
||||
[winkler_score(low_i, high_i, true_v, self.alpha)
|
||||
for (low_i, high_i, true_v) in zip(self.I_low, self.I_high, true_prev)]
|
||||
[winkler_score(low_i, high_i, true_v, alpha, center)
|
||||
for (low_i, high_i, true_v, center) in zip(self.I_low, self.I_high, true_prev, self.point_estimate())]
|
||||
)
|
||||
|
||||
def mean_winkler_score(self, true_prev):
|
||||
return np.mean(self.winkler_scores(true_prev))
|
||||
def mean_winkler_score(self, true_prev, alpha=None, add_ae=False):
|
||||
return np.mean(self.winkler_scores(true_prev, alpha=alpha, add_ae=add_ae))
|
||||
|
||||
|
||||
|
||||
class ConfidenceEllipseSimplex(ConfidenceRegionABC):
|
||||
|
|
@ -486,8 +496,8 @@ class ConfidenceEllipseTransformed(ConfidenceRegionABC):
|
|||
samples = np.asarray(samples)
|
||||
self.transformation = transformation
|
||||
Z = self.transformation(samples)
|
||||
# self.mean_ = np.mean(samples, axis=0)
|
||||
self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
|
||||
self.mean_ = np.mean(samples, axis=0)
|
||||
# self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
|
||||
self.conf_region_z = ConfidenceEllipseSimplex(Z, confidence_level=confidence_level)
|
||||
self._samples = samples
|
||||
self.alpha = 1.-confidence_level
|
||||
|
|
@ -549,7 +559,99 @@ class ConfidenceEllipseILR(ConfidenceEllipseTransformed):
|
|||
|
||||
|
||||
|
||||
class ConfidenceIntervalsTransformed(ConfidenceRegionABC):
|
||||
"""
|
||||
Instantiates a Confidence Interval region in a transformed space.
|
||||
|
||||
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||
:param confidence_level: float, the confidence level (default 0.95)
|
||||
:param bonferroni_correction: bool (default False), if True, a Bonferroni correction
|
||||
is applied to the significance level (`alpha`) before computing confidence intervals.
|
||||
The correction consists of replacing `alpha` with `alpha/n_classes`. When
|
||||
`n_classes=2` the correction is not applied because there is only one verification test
|
||||
since the other class is constrained. This is not necessarily true for n_classes>2.
|
||||
"""
|
||||
|
||||
def __init__(self, samples, transformation: CompositionalTransformation, confidence_level=0.95, bonferroni_correction=False):
|
||||
samples = np.asarray(samples)
|
||||
self.transformation = transformation
|
||||
Z = self.transformation(samples)
|
||||
self.mean_ = np.mean(samples, axis=0)
|
||||
# self.mean_ = self.transformation.inverse(np.mean(Z, axis=0))
|
||||
self.conf_region_z = ConfidenceIntervals(Z, confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
|
||||
self._samples = samples
|
||||
self.alpha = 1.-confidence_level
|
||||
|
||||
@property
|
||||
def samples(self):
|
||||
return self._samples
|
||||
|
||||
def point_estimate(self):
|
||||
"""
|
||||
Returns the point estimate, the center of the ellipse.
|
||||
|
||||
:return: np.ndarray of shape (n_classes,)
|
||||
"""
|
||||
# The inverse of the CLR does not coincide with the true mean, because the geometric mean
|
||||
# requires smoothing the prevalence vectors and this affects the softmax (inverse);
|
||||
# return self.clr.inverse(self.mean_) # <- does not coincide
|
||||
return self.mean_
|
||||
|
||||
def coverage(self, true_value):
|
||||
"""
|
||||
Checks whether a value, or a sets of values, are contained in the confidence region. The method computes the
|
||||
fraction of these that are contained in the region, if more than one value is passed. If only one value is
|
||||
passed, then it either returns 1.0 or 0.0, for indicating the value is in the region or not, respectively.
|
||||
|
||||
:param true_value: a np.ndarray of shape (n_classes,) or shape (n_values, n_classes,)
|
||||
:return: float in [0,1]
|
||||
"""
|
||||
transformed_values = self.transformation(true_value)
|
||||
return self.conf_region_z.coverage(transformed_values)
|
||||
|
||||
def coverage_soft(self, true_value):
|
||||
transformed_values = self.transformation(true_value)
|
||||
return self.conf_region_z.coverage_soft(transformed_values)
|
||||
|
||||
def winkler_scores(self, true_prev, alpha=None, add_ae=False):
|
||||
transformed_values = self.transformation(true_prev)
|
||||
return self.conf_region_z.winkler_scores(transformed_values, alpha=alpha, add_ae=add_ae)
|
||||
|
||||
def mean_winkler_score(self, true_prev, alpha=None, add_ae=False):
|
||||
transformed_values = self.transformation(true_prev)
|
||||
return self.conf_region_z.mean_winkler_score(transformed_values, alpha=alpha, add_ae=add_ae)
|
||||
|
||||
|
||||
class ConfidenceIntervalsCLR(ConfidenceIntervalsTransformed):
|
||||
"""
|
||||
Instantiates a Confidence Intervals in the Centered-Log Ratio (CLR) space.
|
||||
|
||||
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||
:param confidence_level: float, the confidence level (default 0.95)
|
||||
:param bonferroni_correction: bool (default False), if True, a Bonferroni correction
|
||||
is applied to the significance level (`alpha`) before computing confidence intervals.
|
||||
The correction consists of replacing `alpha` with `alpha/n_classes`. When
|
||||
`n_classes=2` the correction is not applied because there is only one verification test
|
||||
since the other class is constrained. This is not necessarily true for n_classes>2.
|
||||
"""
|
||||
def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
|
||||
super().__init__(samples, CLRtransformation(), confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
|
||||
|
||||
|
||||
class ConfidenceIntervalsILR(ConfidenceIntervalsTransformed):
|
||||
"""
|
||||
Instantiates a Confidence Intervals in the Isometric-Log Ratio (CLR) space.
|
||||
|
||||
:param samples: np.ndarray of shape (n_bootstrap_samples, n_classes)
|
||||
:param confidence_level: float, the confidence level (default 0.95)
|
||||
:param bonferroni_correction: bool (default False), if True, a Bonferroni correction
|
||||
is applied to the significance level (`alpha`) before computing confidence intervals.
|
||||
The correction consists of replacing `alpha` with `alpha/n_classes`. When
|
||||
`n_classes=2` the correction is not applied because there is only one verification test
|
||||
since the other class is constrained. This is not necessarily true for n_classes>2.
|
||||
"""
|
||||
def __init__(self, samples, confidence_level=0.95, bonferroni_correction=False):
|
||||
super().__init__(samples, ILRtransformation(), confidence_level=confidence_level, bonferroni_correction=bonferroni_correction)
|
||||
|
||||
|
||||
|
||||
|
|
@ -611,23 +713,35 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
|
|||
self.verbose = verbose
|
||||
|
||||
def aggregation_fit(self, classif_predictions, labels):
|
||||
data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
|
||||
|
||||
self.quantifiers = []
|
||||
if self.n_train_samples==1:
|
||||
self.quantifier.aggregation_fit(classif_predictions, labels)
|
||||
self.quantifiers.append(self.quantifier)
|
||||
else:
|
||||
# model-based bootstrap (only on the aggregative part)
|
||||
n_examples = len(data)
|
||||
full_index = np.arange(n_examples)
|
||||
with qp.util.temp_seed(self.random_state):
|
||||
for i in range(self.n_train_samples):
|
||||
quantifier = copy.deepcopy(self.quantifier)
|
||||
index = resample(full_index, n_samples=n_examples)
|
||||
classif_predictions_i = classif_predictions.sampling_from_index(index)
|
||||
data_i = data.sampling_from_index(index)
|
||||
quantifier.aggregation_fit(classif_predictions_i, data_i)
|
||||
self.quantifiers.append(quantifier)
|
||||
if classif_predictions is None or labels is None:
|
||||
# The entire dataset was consumed for classifier training, implying there is no need for training
|
||||
# an aggregation function. If the bootstrap method was configured to train different aggregators
|
||||
# (i.e., self.n_train_samples>1), then an error is raise. Otherwise, the method ends.
|
||||
if self.n_train_samples > 1:
|
||||
raise ValueError(
|
||||
f'The underlying quantifier ({self.quantifier.__class__.__name__}) has consumed, all training '
|
||||
f'data, meaning the aggregation function needs none, but {self.n_train_samples=} is > 1, which '
|
||||
f'is inconsistent.'
|
||||
)
|
||||
else:
|
||||
# model-based bootstrap (only on the aggregative part)
|
||||
data = LabelledCollection(classif_predictions, labels, classes=self.classes_)
|
||||
n_examples = len(data)
|
||||
full_index = np.arange(n_examples)
|
||||
with qp.util.temp_seed(self.random_state):
|
||||
for i in range(self.n_train_samples):
|
||||
quantifier = copy.deepcopy(self.quantifier)
|
||||
index = resample(full_index, n_samples=n_examples)
|
||||
classif_predictions_i = classif_predictions.sampling_from_index(index)
|
||||
data_i = data.sampling_from_index(index)
|
||||
quantifier.aggregation_fit(classif_predictions_i, data_i)
|
||||
self.quantifiers.append(quantifier)
|
||||
return self
|
||||
|
||||
def aggregate(self, classif_predictions: np.ndarray):
|
||||
|
|
@ -653,8 +767,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
|
|||
return prev_estim, conf
|
||||
|
||||
def aggregate_conf(self, classif_predictions: np.ndarray, confidence_level=None):
|
||||
if confidence_level is None:
|
||||
confidence_level = self.confidence_level
|
||||
confidence_level = confidence_level or self.confidence_level
|
||||
|
||||
|
||||
n_samples = classif_predictions.shape[0]
|
||||
prevs = []
|
||||
|
|
@ -665,11 +779,8 @@ class AggregativeBootstrap(WithConfidenceABC, AggregativeQuantifier):
|
|||
for i in range(self.n_test_samples)
|
||||
)
|
||||
prevs.extend(results)
|
||||
# for i in tqdm(range(self.n_test_samples), desc='resampling', total=self.n_test_samples, disable=not self.verbose):
|
||||
# sample_i = resample(classif_predictions, n_samples=n_samples)
|
||||
# prev_i = quantifier.aggregate(sample_i)
|
||||
# prevs.append(prev_i)
|
||||
|
||||
prevs = np.array(prevs)
|
||||
conf = WithConfidenceABC.construct_region(prevs, confidence_level, method=self.region)
|
||||
prev_estim = conf.point_estimate()
|
||||
|
||||
|
|
@ -741,6 +852,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
|
|||
mcmc_seed: int = 0,
|
||||
confidence_level: float = 0.95,
|
||||
region: str = 'intervals',
|
||||
temperature = 1.,
|
||||
prior = 'uniform'):
|
||||
|
||||
if num_warmup <= 0:
|
||||
|
|
@ -761,6 +873,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
|
|||
self.mcmc_seed = mcmc_seed
|
||||
self.confidence_level = confidence_level
|
||||
self.region = region
|
||||
self.temperature = temperature
|
||||
self.prior = prior
|
||||
|
||||
# Array of shape (n_classes, n_predicted_classes,) where entry (y, c) is the number of instances
|
||||
|
|
@ -804,6 +917,7 @@ class BayesianCC(AggregativeCrispQuantifier, WithConfidenceABC):
|
|||
num_warmup=self.num_warmup,
|
||||
num_samples=self.num_samples,
|
||||
alpha=alpha,
|
||||
temperature=self.temperature,
|
||||
seed=self.mcmc_seed,
|
||||
)
|
||||
return self._samples
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
Subproject commit 9d433e3e35b4d111a3914a1e7d3257a8fcf24a9b
|
||||
Loading…
Reference in New Issue