calibrating temperature and coming back to Aitchison kernel
This commit is contained in:
parent
e33b291357
commit
ae9503a43b
|
|
@ -1,7 +1,23 @@
|
||||||
- Add other methods that natively provide uncertainty quantification methods?
|
- Add other methods that natively provide uncertainty quantification methods? (e.g., Ratio estimator, Card & Smith)
|
||||||
- MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
|
- MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
|
||||||
- Implement Interval Score or Winkler Score
|
- Implement Interval Score or Winkler Score
|
||||||
- analyze across shift
|
- analyze across shift
|
||||||
- add Bayesian EM
|
- add Bayesian EM:
|
||||||
- optimize also C and class_weight?
|
- https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/mapls.py
|
||||||
|
- take this opportunity to add RLLS: https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/rlls.py
|
||||||
|
- add CIFAR10 and MNIST? Maybe consider also previously tested types of shift (tweak-one-out, etc.)? from RLLS paper
|
||||||
|
- https://github.com/Angie-Liu/labelshift/tree/master
|
||||||
|
- https://github.com/Angie-Liu/labelshift/blob/master/cifar10_for_labelshift.py
|
||||||
|
- Note: MNIST is downloadable from https://archive.ics.uci.edu/dataset/683/mnist+database+of+handwritten+digits
|
||||||
|
- consider prior knowledge in experiments:
|
||||||
|
- One scenario in which our prior is uninformative (i.e., uniform)
|
||||||
|
- One scenario in which our prior is wrong (e.g., alpha-prior = (3,2,1), protocol-prior=(1,1,5))
|
||||||
|
- One scenario in which our prior is very good (e.g., alpha-prior = (3,2,1), protocol-prior=(3,2,1))
|
||||||
|
- Do all my baseline methods come with the option to inform a prior?
|
||||||
|
- consider different bandwidths within the bayesian approach?
|
||||||
|
- how to improve the coverage (or how to increase the amplitude)?
|
||||||
|
- Added temperature-calibration, improve things.
|
||||||
|
- Is temperature-calibration actually not equivalent to using a larger bandwidth in the kernels?
|
||||||
|
- consider W as a measure of quantification error (the current e.g., w-CI is the winkler...)
|
||||||
|
- optimize also C and class_weight? [I don't think so, but could be done easily now]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
||||||
raise ValueError(f'parameter {num_samples=} must be a positive integer')
|
raise ValueError(f'parameter {num_samples=} must be a positive integer')
|
||||||
assert explore in ['simplex', 'clr', 'ilr'], \
|
assert explore in ['simplex', 'clr', 'ilr'], \
|
||||||
f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
|
f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
|
||||||
assert temperature>0., f'temperature must be >0'
|
# assert temperature>0., f'temperature must be >0'
|
||||||
assert engine in ['rw-mh', 'emcee', 'numpyro']
|
assert engine in ['rw-mh', 'emcee', 'numpyro']
|
||||||
|
|
||||||
super().__init__(classifier, fit_classifier, val_split)
|
super().__init__(classifier, fit_classifier, val_split)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ from sklearn.model_selection import GridSearchCV, StratifiedKFold
|
||||||
from copy import deepcopy as cp
|
from copy import deepcopy as cp
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
||||||
|
from BayesianKDEy.temperature_calibration import temp_calibration
|
||||||
from build.lib.quapy.data import LabelledCollection
|
from build.lib.quapy.data import LabelledCollection
|
||||||
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
|
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
|
||||||
from quapy.method.base import BinaryQuantifier, BaseQuantifier
|
from quapy.method.base import BinaryQuantifier, BaseQuantifier
|
||||||
|
|
@ -72,14 +73,22 @@ def methods():
|
||||||
# yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
|
# yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
|
||||||
#
|
#
|
||||||
# yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
|
# yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
|
||||||
# yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
|
yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
|
||||||
# yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
|
# yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
|
||||||
# yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
|
# yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
|
||||||
# yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
|
# yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
|
||||||
# yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
|
# yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
|
||||||
# yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
|
# yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
|
||||||
yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
|
yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
|
||||||
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, step_size=.1, engine='numpyro', **hyper), multiclass_method
|
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||||
|
yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||||
|
|
||||||
|
|
||||||
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
|
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||||
|
|
@ -114,7 +123,12 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
|
||||||
)
|
)
|
||||||
|
|
||||||
t_init = time()
|
t_init = time()
|
||||||
withconf_quantifier = withconf_constructor(best_hyperparams).fit(*training.Xy)
|
withconf_quantifier = withconf_constructor(best_hyperparams)
|
||||||
|
if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None:
|
||||||
|
train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
|
||||||
|
temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1)
|
||||||
|
withconf_quantifier.temperature = temperature
|
||||||
|
withconf_quantifier.fit(*training.Xy)
|
||||||
tr_time = time() - t_init
|
tr_time = time() - t_init
|
||||||
|
|
||||||
# test
|
# test
|
||||||
|
|
@ -155,13 +169,13 @@ if __name__ == '__main__':
|
||||||
binary = {
|
binary = {
|
||||||
'datasets': qp.datasets.UCI_BINARY_DATASETS,
|
'datasets': qp.datasets.UCI_BINARY_DATASETS,
|
||||||
'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
|
'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
|
||||||
'sample_size': 100 # previous: 500
|
'sample_size': 500 # previous: small 100, big 500
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass = {
|
multiclass = {
|
||||||
'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
|
'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
|
||||||
'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
|
'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
|
||||||
'sample_size': 200 # previous: 1000
|
'sample_size': 1000 # previous: small 200, big 1000
|
||||||
}
|
}
|
||||||
|
|
||||||
result_dir = Path('./results')
|
result_dir = Path('./results')
|
||||||
|
|
|
||||||
|
|
@ -80,7 +80,30 @@ def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwar
|
||||||
|
|
||||||
|
|
||||||
# methods = None # show all methods
|
# methods = None # show all methods
|
||||||
methods = ['BayesianACC', 'BayesianKDEy', 'BaKDE-emcee', 'BaKDE-numpyro']
|
methods = ['BayesianACC', #'BayesianKDEy',
|
||||||
|
#'BaKDE-emcee',
|
||||||
|
# 'BaKDE-numpyro',
|
||||||
|
# 'BaKDE-numpyro-T2',
|
||||||
|
# 'BaKDE-numpyro-T10',
|
||||||
|
# 'BaKDE-numpyro-T*',
|
||||||
|
# 'BaKDE-Ait-numpyro',
|
||||||
|
'BaKDE-Ait-numpyro-T*',
|
||||||
|
# 'BaKDE-Ait-numpyro-T*ILR',
|
||||||
|
'BootstrapACC',
|
||||||
|
'BootstrapHDy',
|
||||||
|
'BootstrapKDEy'
|
||||||
|
]
|
||||||
|
|
||||||
|
def nicer(name:str):
|
||||||
|
replacements = {
|
||||||
|
'Bayesian': 'Ba',
|
||||||
|
'Bootstrap': 'Bo',
|
||||||
|
'numpyro': 'ro',
|
||||||
|
'emcee': 'emc',
|
||||||
|
}
|
||||||
|
for k, v in replacements.items():
|
||||||
|
name = name.replace(k,v)
|
||||||
|
return name
|
||||||
|
|
||||||
for setup in ['multiclass']:
|
for setup in ['multiclass']:
|
||||||
path = f'./results/{setup}/*.pkl'
|
path = f'./results/{setup}/*.pkl'
|
||||||
|
|
@ -93,7 +116,7 @@ for setup in ['multiclass']:
|
||||||
report = pickle.load(open(file, 'rb'))
|
report = pickle.load(open(file, 'rb'))
|
||||||
results = report['results']
|
results = report['results']
|
||||||
n_samples = len(results['ae'])
|
n_samples = len(results['ae'])
|
||||||
table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
|
table['method'].extend([nicer(method)] * n_samples)
|
||||||
table['dataset'].extend([dataset] * n_samples)
|
table['dataset'].extend([dataset] * n_samples)
|
||||||
table['ae'].extend(results['ae'])
|
table['ae'].extend(results['ae'])
|
||||||
table['rae'].extend(results['rae'])
|
table['rae'].extend(results['rae'])
|
||||||
|
|
@ -101,28 +124,29 @@ for setup in ['multiclass']:
|
||||||
# table['a-CI'].extend(results['amplitude'])
|
# table['a-CI'].extend(results['amplitude'])
|
||||||
|
|
||||||
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
|
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
|
||||||
update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
# update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
||||||
update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
# update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
||||||
update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
# update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
||||||
|
|
||||||
table['c-CI'].extend(report['coverage-CI'])
|
table['c-CI'].extend(report['coverage-CI'])
|
||||||
table['a-CI'].extend(report['amplitude-CI'])
|
table['a-CI'].extend(report['amplitude-CI'])
|
||||||
table['w-CI'].extend(report['winkler-CI'])
|
table['w-CI'].extend(report['winkler-CI'])
|
||||||
table['amperr-CI'].extend(report['amperr-CI'])
|
table['amperr-CI'].extend(report['amperr-CI'])
|
||||||
|
|
||||||
table['c-CE'].extend(report['coverage-CE'])
|
# table['c-CE'].extend(report['coverage-CE'])
|
||||||
table['a-CE'].extend(report['amplitude-CE'])
|
# table['a-CE'].extend(report['amplitude-CE'])
|
||||||
table['amperr-CE'].extend(report['amperr-CE'])
|
# table['amperr-CE'].extend(report['amperr-CE'])
|
||||||
|
|
||||||
table['c-CLR'].extend(report['coverage-CLR'])
|
# table['c-CLR'].extend(report['coverage-CLR'])
|
||||||
table['a-CLR'].extend(report['amplitude-CLR'])
|
# table['a-CLR'].extend(report['amplitude-CLR'])
|
||||||
table['amperr-CLR'].extend(report['amperr-CLR'])
|
# table['amperr-CLR'].extend(report['amperr-CLR'])
|
||||||
|
|
||||||
table['c-ILR'].extend(report['coverage-ILR'])
|
# table['c-ILR'].extend(report['coverage-ILR'])
|
||||||
table['a-ILR'].extend(report['amplitude-ILR'])
|
# table['a-ILR'].extend(report['amplitude-ILR'])
|
||||||
table['amperr-ILR'].extend(report['amperr-ILR'])
|
# table['amperr-ILR'].extend(report['amperr-ILR'])
|
||||||
|
|
||||||
table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
|
table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
|
||||||
|
table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
|
||||||
# table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
|
# table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
|
||||||
# table['aitch'].extend()
|
# table['aitch'].extend()
|
||||||
# table['reg-score-ILR'].extend(
|
# table['reg-score-ILR'].extend(
|
||||||
|
|
@ -145,16 +169,20 @@ for setup in ['multiclass']:
|
||||||
tr_size[dataset] = len(data.training)
|
tr_size[dataset] = len(data.training)
|
||||||
|
|
||||||
# remove datasets with more than max_classes classes
|
# remove datasets with more than max_classes classes
|
||||||
# max_classes = 30
|
max_classes = 25
|
||||||
# min_train = 1000
|
min_train = 500
|
||||||
# for data_name, n in n_classes.items():
|
ignore_datasets = ['poker_hand', 'hcv']
|
||||||
# if n > max_classes:
|
for data_name, n in n_classes.items():
|
||||||
# df = df[df["dataset"] != data_name]
|
if n > max_classes:
|
||||||
# for data_name, n in tr_size.items():
|
df = df[df["dataset"] != data_name]
|
||||||
# if n < min_train:
|
for data_name, n in tr_size.items():
|
||||||
# df = df[df["dataset"] != data_name]
|
if n < min_train:
|
||||||
|
df = df[df["dataset"] != data_name]
|
||||||
|
for data_name, n in tr_size.items():
|
||||||
|
if data_name in ignore_datasets:
|
||||||
|
df = df[df["dataset"] != data_name]
|
||||||
|
|
||||||
for region in ['CI', 'CE', 'CLR', 'ILR']:
|
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
|
||||||
if setup == 'binary' and region=='ILR':
|
if setup == 'binary' and region=='ILR':
|
||||||
continue
|
continue
|
||||||
# pv = pd.pivot_table(
|
# pv = pd.pivot_table(
|
||||||
|
|
@ -162,11 +190,12 @@ for setup in ['multiclass']:
|
||||||
# )
|
# )
|
||||||
pv = pd.pivot_table(
|
pv = pd.pivot_table(
|
||||||
df, index='dataset', columns='method', values=[
|
df, index='dataset', columns='method', values=[
|
||||||
f'amperr-{region}',
|
# f'amperr-{region}',
|
||||||
f'a-{region}',
|
f'a-{region}',
|
||||||
f'c-{region}',
|
f'c-{region}',
|
||||||
#f'w-{region}',
|
# f'w-{region}',
|
||||||
'ae',
|
'ae',
|
||||||
|
'SRE',
|
||||||
# 'rae',
|
# 'rae',
|
||||||
# f'aitch',
|
# f'aitch',
|
||||||
# f'aitch-well'
|
# f'aitch-well'
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import os
|
import os
|
||||||
|
import pickle
|
||||||
import warnings
|
import warnings
|
||||||
from os.path import join
|
from os.path import join
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
@ -27,7 +28,7 @@ from scipy.stats import dirichlet
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from time import time
|
from time import time
|
||||||
from sklearn.base import clone, BaseEstimator
|
from sklearn.base import clone, BaseEstimator
|
||||||
|
from temperature_calibration import temp_calibration
|
||||||
|
|
||||||
def methods():
|
def methods():
|
||||||
"""
|
"""
|
||||||
|
|
@ -50,7 +51,8 @@ def methods():
|
||||||
# for T in [10, 20, 50, 100., 500]:
|
# for T in [10, 20, 50, 100., 500]:
|
||||||
# yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
|
# yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
|
||||||
|
|
||||||
yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
|
# yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
|
||||||
|
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', **hyper)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -68,8 +70,8 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
setup = multiclass
|
setup = multiclass
|
||||||
# data_name = 'isolet'
|
# data_name = 'isolet'
|
||||||
data_name = 'academic-success'
|
# data_name = 'academic-success'
|
||||||
# data_name = 'abalone'
|
data_name = 'poker_hand'
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = setup['sample_size']
|
qp.environ['SAMPLE_SIZE'] = setup['sample_size']
|
||||||
print(f'dataset={data_name}')
|
print(f'dataset={data_name}')
|
||||||
|
|
@ -86,6 +88,11 @@ if __name__ == '__main__':
|
||||||
f'coverage={report["results"]["coverage"].mean():.5f}, '
|
f'coverage={report["results"]["coverage"].mean():.5f}, '
|
||||||
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
|
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
|
||||||
|
|
||||||
|
# best_hyperparams = pickle.load(open(hyper_path, 'rb'))
|
||||||
|
# method = withconf_constructor(best_hyperparams)
|
||||||
|
#
|
||||||
|
# train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
|
||||||
|
# temp_calibration(method, train, val, amplitude_threshold=0.1475, temp_grid=[2., 10., 100., 1000.])#temp_grid=[1., 1.25, 1.5, 1.75, 2.])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,129 @@
|
||||||
|
import os.path
|
||||||
|
import pickle
|
||||||
|
from collections import defaultdict
|
||||||
|
from pathlib import Path
|
||||||
|
import pandas as pd
|
||||||
|
import quapy as qp
|
||||||
|
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
||||||
|
from BayesianKDEy.full_experiments import experiment_path
|
||||||
|
from quapy.protocol import UPP
|
||||||
|
import numpy as np
|
||||||
|
from tqdm import tqdm
|
||||||
|
from joblib import Parallel, delayed
|
||||||
|
from itertools import product
|
||||||
|
|
||||||
|
# is there a correspondence between smaller bandwidths and overconfident likelihoods? if so, a high temperature
|
||||||
|
# after calibration might be correlated; this script aims at analyzing this trend
|
||||||
|
|
||||||
|
datasets = qp.datasets.UCI_MULTICLASS_DATASETS
|
||||||
|
|
||||||
|
def show(results, values):
|
||||||
|
df_res = pd.DataFrame(results)
|
||||||
|
df_mean = (
|
||||||
|
df_res
|
||||||
|
.groupby(['bandwidth', 'temperature'], as_index=False)
|
||||||
|
.mean(numeric_only=True)
|
||||||
|
)
|
||||||
|
pivot = df_mean.pivot(
|
||||||
|
index='bandwidth',
|
||||||
|
columns='temperature',
|
||||||
|
values=values
|
||||||
|
)
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
|
||||||
|
|
||||||
|
plt.imshow(pivot, origin='lower', aspect='auto')
|
||||||
|
plt.colorbar(label=values)
|
||||||
|
|
||||||
|
plt.xticks(range(len(pivot.columns)), pivot.columns)
|
||||||
|
plt.yticks(range(len(pivot.index)), pivot.index)
|
||||||
|
|
||||||
|
plt.xlabel('Temperature')
|
||||||
|
plt.ylabel('Bandwidth')
|
||||||
|
plt.title(f'{values} heatmap')
|
||||||
|
|
||||||
|
plt.savefig(f'./plotcorr/{values}.png')
|
||||||
|
plt.cla()
|
||||||
|
plt.clf()
|
||||||
|
|
||||||
|
def run_experiment(
|
||||||
|
bandwidth,
|
||||||
|
temperature,
|
||||||
|
train,
|
||||||
|
test,
|
||||||
|
dataset,
|
||||||
|
):
|
||||||
|
qp.environ['SAMPLE_SIZE'] = 1000
|
||||||
|
bakde = BayesianKDEy(
|
||||||
|
engine='numpyro',
|
||||||
|
bandwidth=bandwidth,
|
||||||
|
temperature=temperature,
|
||||||
|
)
|
||||||
|
bakde.fit(*train.Xy)
|
||||||
|
|
||||||
|
test_generator = UPP(test, repeats=20, random_state=0)
|
||||||
|
|
||||||
|
rows = []
|
||||||
|
|
||||||
|
for i, (sample, prev) in enumerate(
|
||||||
|
tqdm(
|
||||||
|
test_generator(),
|
||||||
|
desc=f"bw={bandwidth}, T={temperature}",
|
||||||
|
total=test_generator.total(),
|
||||||
|
leave=False,
|
||||||
|
)
|
||||||
|
):
|
||||||
|
point_estimate, region = bakde.predict_conf(sample)
|
||||||
|
|
||||||
|
rows.append({
|
||||||
|
"bandwidth": bandwidth,
|
||||||
|
"temperature": temperature,
|
||||||
|
"dataset": dataset,
|
||||||
|
"sample": i,
|
||||||
|
"mae": qp.error.mae(prev, point_estimate),
|
||||||
|
"cov": region.coverage(prev),
|
||||||
|
"amp": region.montecarlo_proportion(n_trials=50_000),
|
||||||
|
})
|
||||||
|
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
bandwidths = [0.001, 0.005, 0.01, 0.05, 0.1]
|
||||||
|
temperatures = [0.5, 0.75, 1., 2., 5.]
|
||||||
|
|
||||||
|
res_dir = './plotcorr/results'
|
||||||
|
os.makedirs(res_dir, exist_ok=True)
|
||||||
|
|
||||||
|
all_rows = []
|
||||||
|
for i, dataset in enumerate(datasets):
|
||||||
|
if dataset in ['letter', 'isolet']: continue
|
||||||
|
res_path = f'{res_dir}/{dataset}.pkl'
|
||||||
|
if os.path.exists(res_path):
|
||||||
|
print(f'loading results from pickle {res_path}')
|
||||||
|
results_data_rows = pickle.load(open(res_path, 'rb'))
|
||||||
|
else:
|
||||||
|
print(f'{dataset=} [complete={i}/{len(datasets)}]')
|
||||||
|
data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
|
||||||
|
train, test = data.train_test
|
||||||
|
jobs = list(product(bandwidths, temperatures))
|
||||||
|
results_data_rows = Parallel(n_jobs=-1,backend="loky")(
|
||||||
|
delayed(run_experiment)(bw, T, train, test, dataset) for bw, T in jobs
|
||||||
|
)
|
||||||
|
pickle.dump(results_data_rows, open(res_path, 'wb'), pickle.HIGHEST_PROTOCOL)
|
||||||
|
all_rows.extend(results_data_rows)
|
||||||
|
|
||||||
|
results = defaultdict(list)
|
||||||
|
for rows in all_rows:
|
||||||
|
for row in rows:
|
||||||
|
for k, v in row.items():
|
||||||
|
results[k].append(v)
|
||||||
|
|
||||||
|
show(results, 'mae')
|
||||||
|
show(results, 'cov')
|
||||||
|
show(results, 'amp')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,114 @@
|
||||||
|
from build.lib.quapy.data import LabelledCollection
|
||||||
|
from quapy.method.confidence import WithConfidenceABC
|
||||||
|
from quapy.protocol import UPP
|
||||||
|
import numpy as np
|
||||||
|
from tqdm import tqdm
|
||||||
|
import quapy as qp
|
||||||
|
from joblib import Parallel, delayed
|
||||||
|
import copy
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def temp_calibration(method:WithConfidenceABC,
|
||||||
|
train:LabelledCollection,
|
||||||
|
val:LabelledCollection,
|
||||||
|
temp_grid=[1, 1.5, 2],
|
||||||
|
num_samples=100,
|
||||||
|
nominal_coverage=0.95,
|
||||||
|
amplitude_threshold='auto',
|
||||||
|
random_state=0,
|
||||||
|
n_jobs=1,
|
||||||
|
verbose=True):
|
||||||
|
|
||||||
|
assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold < 1.), \
|
||||||
|
f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float < 1.0.'
|
||||||
|
|
||||||
|
if amplitude_threshold=='auto':
|
||||||
|
n_classes = train.n_classes
|
||||||
|
amplitude_threshold = .1/np.log(n_classes+1)
|
||||||
|
|
||||||
|
if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
|
||||||
|
print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
|
||||||
|
|
||||||
|
|
||||||
|
method.fit(*train.Xy)
|
||||||
|
label_shift_prot = UPP(val, repeats=num_samples, random_state=random_state)
|
||||||
|
|
||||||
|
# results = []
|
||||||
|
# temp_grid = sorted(temp_grid)
|
||||||
|
# for temp in temp_grid:
|
||||||
|
# method.temperature = temp
|
||||||
|
# coverage = 0
|
||||||
|
# amplitudes = []
|
||||||
|
# errs = []
|
||||||
|
# pbar = tqdm(enumerate(label_shift_prot()), total=label_shift_prot.total(), disable=not verbose)
|
||||||
|
# for i, (sample, prev) in pbar:
|
||||||
|
# point_estim, conf_region = method.predict_conf(sample)
|
||||||
|
# if prev in conf_region:
|
||||||
|
# coverage += 1
|
||||||
|
# amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
|
||||||
|
# errs.append(qp.error.mae(prev, point_estim))
|
||||||
|
# if verbose:
|
||||||
|
# pbar.set_description(
|
||||||
|
# f'temperature={temp:.2f}, '
|
||||||
|
# f'coverage={coverage/(i+1):.4f}, '
|
||||||
|
# f'amplitude={np.mean(amplitudes):.4f},'
|
||||||
|
# f'mae={np.mean(errs):.4f}'
|
||||||
|
# )
|
||||||
|
#
|
||||||
|
# mean_coverage = coverage / label_shift_prot.total()
|
||||||
|
# mean_amplitude = np.mean(amplitudes)
|
||||||
|
#
|
||||||
|
# if mean_amplitude < amplitude_threshold:
|
||||||
|
# results.append((temp, mean_coverage, mean_amplitude))
|
||||||
|
# else:
|
||||||
|
# break
|
||||||
|
|
||||||
|
def evaluate_temperature(temp):
|
||||||
|
local_method = copy.deepcopy(method)
|
||||||
|
local_method.temperature = temp
|
||||||
|
|
||||||
|
coverage = 0
|
||||||
|
amplitudes = []
|
||||||
|
errs = []
|
||||||
|
|
||||||
|
for i, (sample, prev) in enumerate(label_shift_prot()):
|
||||||
|
point_estim, conf_region = local_method.predict_conf(sample)
|
||||||
|
|
||||||
|
if prev in conf_region:
|
||||||
|
coverage += 1
|
||||||
|
|
||||||
|
amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
|
||||||
|
errs.append(qp.error.mae(prev, point_estim))
|
||||||
|
|
||||||
|
mean_coverage = coverage / label_shift_prot.total()
|
||||||
|
mean_amplitude = np.mean(amplitudes)
|
||||||
|
|
||||||
|
return temp, mean_coverage, mean_amplitude
|
||||||
|
|
||||||
|
temp_grid = sorted(temp_grid)
|
||||||
|
|
||||||
|
raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
|
||||||
|
delayed(evaluate_temperature)(temp)
|
||||||
|
for temp in tqdm(temp_grid, disable=not verbose)
|
||||||
|
)
|
||||||
|
results = [
|
||||||
|
(temp, cov, amp)
|
||||||
|
for temp, cov, amp in raw_results
|
||||||
|
if amp < amplitude_threshold
|
||||||
|
]
|
||||||
|
|
||||||
|
chosen_temperature = 1.
|
||||||
|
if len(results) > 0:
|
||||||
|
chosen_temperature = min(results, key=lambda x: abs(x[1]-nominal_coverage))[0]
|
||||||
|
|
||||||
|
print(f'chosen_temperature={chosen_temperature:.2f}')
|
||||||
|
|
||||||
|
return chosen_temperature
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -738,14 +738,29 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
|
||||||
print(f'Loading UCI Muticlass {dataset_name} ({fullname})')
|
print(f'Loading UCI Muticlass {dataset_name} ({fullname})')
|
||||||
|
|
||||||
file = join(data_home, 'uci_multiclass', dataset_name+'.pkl')
|
file = join(data_home, 'uci_multiclass', dataset_name+'.pkl')
|
||||||
|
|
||||||
|
def dummify_categorical_features(df_features, dataset_id):
|
||||||
|
CATEGORICAL_FEATURES = {
|
||||||
|
158: ["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5"], # poker_hand
|
||||||
|
}
|
||||||
|
|
||||||
|
categorical = CATEGORICAL_FEATURES.get(dataset_id, [])
|
||||||
|
|
||||||
|
X = df_features.copy()
|
||||||
|
if categorical:
|
||||||
|
X[categorical] = X[categorical].astype("category")
|
||||||
|
X = pd.get_dummies(X, columns=categorical, drop_first=True)
|
||||||
|
|
||||||
|
return X
|
||||||
|
|
||||||
def download(id, name):
|
def download(id, name):
|
||||||
df = fetch_ucirepo(id=id)
|
df = fetch_ucirepo(id=id)
|
||||||
|
|
||||||
df.data.features = pd.get_dummies(df.data.features, drop_first=True)
|
X_df = dummify_categorical_features(df.data.features, id)
|
||||||
X, y = df.data.features.to_numpy(dtype=np.float64), df.data.targets.to_numpy().squeeze()
|
X = X_df.to_numpy(dtype=np.float64)
|
||||||
|
y = df.data.targets.to_numpy().squeeze()
|
||||||
|
|
||||||
assert y.ndim == 1, 'more than one y'
|
assert y.ndim == 1, f'error: the dataset {id=} {name=} has more than one target variable'
|
||||||
|
|
||||||
classes = np.sort(np.unique(y))
|
classes = np.sort(np.unique(y))
|
||||||
y = np.searchsorted(classes, y)
|
y = np.searchsorted(classes, y)
|
||||||
|
|
|
||||||
|
|
@ -139,7 +139,8 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
||||||
:math:`\\mathcal{Y}` are the classes of interest
|
:math:`\\mathcal{Y}` are the classes of interest
|
||||||
:param prevs_true: array-like, true prevalence values
|
:param prevs_true: array-like, true prevalence values
|
||||||
:param prevs_hat: array-like, estimated prevalence values
|
:param prevs_hat: array-like, estimated prevalence values
|
||||||
:param prevs_train: array-like, training prevalence values
|
:param prevs_train: array-like with the training prevalence values, or a single prevalence vector when
|
||||||
|
all the prevs_true and prevs_hat are computed on the same training set.
|
||||||
:param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
|
:param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
|
||||||
training prevalence is expected to be >0 everywhere.
|
training prevalence is expected to be >0 everywhere.
|
||||||
:return: the squared ratio error
|
:return: the squared ratio error
|
||||||
|
|
@ -149,6 +150,8 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
||||||
prevs_train = np.asarray(prevs_train)
|
prevs_train = np.asarray(prevs_train)
|
||||||
assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}'
|
assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}'
|
||||||
assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence'
|
assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence'
|
||||||
|
if prevs_true.ndim == 2 and prevs_train.ndim == 1:
|
||||||
|
prevs_train = np.tile(prevs_train, reps=(prevs_true.shape[0], 1))
|
||||||
if eps>0:
|
if eps>0:
|
||||||
prevs_true = smooth(prevs_true, eps)
|
prevs_true = smooth(prevs_true, eps)
|
||||||
prevs_hat = smooth(prevs_hat, eps)
|
prevs_hat = smooth(prevs_hat, eps)
|
||||||
|
|
@ -157,12 +160,13 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
||||||
N = prevs_true.shape[-1]
|
N = prevs_true.shape[-1]
|
||||||
w = prevs_true / prevs_train
|
w = prevs_true / prevs_train
|
||||||
w_hat = prevs_hat / prevs_train
|
w_hat = prevs_hat / prevs_train
|
||||||
return (1./N) * (w - w_hat)**2.
|
return (1./N) * np.sum((w - w_hat)**2., axis=-1)
|
||||||
|
|
||||||
|
|
||||||
def msre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
def msre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
||||||
"""
|
"""
|
||||||
Returns the mean across all experiments. See :function:`sre`.
|
Returns the mean :function:`sre` across all experiments.
|
||||||
|
|
||||||
:param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,)
|
:param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,)
|
||||||
:param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true
|
:param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true
|
||||||
:param prevs_train: array-like, training prevalence values of (n_experiments, n_classes,) or (n_classes,)
|
:param prevs_train: array-like, training prevalence values of (n_experiments, n_classes,) or (n_classes,)
|
||||||
|
|
|
||||||
|
|
@ -139,7 +139,7 @@ class WithConfidenceABC(ABC):
|
||||||
"""
|
"""
|
||||||
Abstract class for confidence regions.
|
Abstract class for confidence regions.
|
||||||
"""
|
"""
|
||||||
METHODS = ['intervals', 'ellipse', 'ellipse-clr']
|
REGION_TYPE = ['intervals', 'ellipse', 'ellipse-clr', 'ellipse-ilr']
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def predict_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
|
def predict_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
|
||||||
|
|
@ -185,6 +185,8 @@ class WithConfidenceABC(ABC):
|
||||||
region = ConfidenceEllipseSimplex(prev_estims, confidence_level=confidence_level)
|
region = ConfidenceEllipseSimplex(prev_estims, confidence_level=confidence_level)
|
||||||
elif method == 'ellipse-clr':
|
elif method == 'ellipse-clr':
|
||||||
region = ConfidenceEllipseCLR(prev_estims, confidence_level=confidence_level)
|
region = ConfidenceEllipseCLR(prev_estims, confidence_level=confidence_level)
|
||||||
|
elif method == 'ellipse-ilr':
|
||||||
|
region = ConfidenceEllipseILR(prev_estims, confidence_level=confidence_level)
|
||||||
|
|
||||||
if region is None:
|
if region is None:
|
||||||
raise NotImplementedError(f'unknown method {method}')
|
raise NotImplementedError(f'unknown method {method}')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue