calibrating temperature and coming back to Aitchison kernel
This commit is contained in:
parent
e33b291357
commit
ae9503a43b
|
|
@ -1,7 +1,23 @@
|
|||
- Add other methods that natively provide uncertainty quantification methods?
|
||||
- Add other methods that natively provide uncertainty quantification methods? (e.g., Ratio estimator, Card & Smith)
|
||||
- MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
|
||||
- Implement Interval Score or Winkler Score
|
||||
- analyze across shift
|
||||
- add Bayesian EM
|
||||
- optimize also C and class_weight?
|
||||
- add Bayesian EM:
|
||||
- https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/mapls.py
|
||||
- take this opportunity to add RLLS: https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/rlls.py
|
||||
- add CIFAR10 and MNIST? Maybe consider also previously tested types of shift (tweak-one-out, etc.)? from RLLS paper
|
||||
- https://github.com/Angie-Liu/labelshift/tree/master
|
||||
- https://github.com/Angie-Liu/labelshift/blob/master/cifar10_for_labelshift.py
|
||||
- Note: MNIST is downloadable from https://archive.ics.uci.edu/dataset/683/mnist+database+of+handwritten+digits
|
||||
- consider prior knowledge in experiments:
|
||||
- One scenario in which our prior is uninformative (i.e., uniform)
|
||||
- One scenario in which our prior is wrong (e.g., alpha-prior = (3,2,1), protocol-prior=(1,1,5))
|
||||
- One scenario in which our prior is very good (e.g., alpha-prior = (3,2,1), protocol-prior=(3,2,1))
|
||||
- Do all my baseline methods come with the option to inform a prior?
|
||||
- consider different bandwidths within the bayesian approach?
|
||||
- how to improve the coverage (or how to increase the amplitude)?
|
||||
- Added temperature-calibration, improve things.
|
||||
- Is temperature-calibration actually not equivalent to using a larger bandwidth in the kernels?
|
||||
- consider W as a measure of quantification error (the current e.g., w-CI is the winkler...)
|
||||
- optimize also C and class_weight? [I don't think so, but could be done easily now]
|
||||
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
|
|||
raise ValueError(f'parameter {num_samples=} must be a positive integer')
|
||||
assert explore in ['simplex', 'clr', 'ilr'], \
|
||||
f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
|
||||
assert temperature>0., f'temperature must be >0'
|
||||
# assert temperature>0., f'temperature must be >0'
|
||||
assert engine in ['rw-mh', 'emcee', 'numpyro']
|
||||
|
||||
super().__init__(classifier, fit_classifier, val_split)
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from sklearn.model_selection import GridSearchCV, StratifiedKFold
|
|||
from copy import deepcopy as cp
|
||||
import quapy as qp
|
||||
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
||||
from BayesianKDEy.temperature_calibration import temp_calibration
|
||||
from build.lib.quapy.data import LabelledCollection
|
||||
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
|
||||
from quapy.method.base import BinaryQuantifier, BaseQuantifier
|
||||
|
|
@ -72,14 +73,22 @@ def methods():
|
|||
# yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
|
||||
#
|
||||
# yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
|
||||
# yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
|
||||
yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
|
||||
# yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
|
||||
# yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
|
||||
# yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
|
||||
# yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
|
||||
# yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
|
||||
yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, step_size=.1, engine='numpyro', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
||||
yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
|
||||
yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
|
||||
|
||||
|
||||
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
|
||||
|
|
@ -114,7 +123,12 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
|
|||
)
|
||||
|
||||
t_init = time()
|
||||
withconf_quantifier = withconf_constructor(best_hyperparams).fit(*training.Xy)
|
||||
withconf_quantifier = withconf_constructor(best_hyperparams)
|
||||
if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None:
|
||||
train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
|
||||
temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1)
|
||||
withconf_quantifier.temperature = temperature
|
||||
withconf_quantifier.fit(*training.Xy)
|
||||
tr_time = time() - t_init
|
||||
|
||||
# test
|
||||
|
|
@ -155,13 +169,13 @@ if __name__ == '__main__':
|
|||
binary = {
|
||||
'datasets': qp.datasets.UCI_BINARY_DATASETS,
|
||||
'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
|
||||
'sample_size': 100 # previous: 500
|
||||
'sample_size': 500 # previous: small 100, big 500
|
||||
}
|
||||
|
||||
multiclass = {
|
||||
'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
|
||||
'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
|
||||
'sample_size': 200 # previous: 1000
|
||||
'sample_size': 1000 # previous: small 200, big 1000
|
||||
}
|
||||
|
||||
result_dir = Path('./results')
|
||||
|
|
|
|||
|
|
@ -80,7 +80,30 @@ def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwar
|
|||
|
||||
|
||||
# methods = None # show all methods
|
||||
methods = ['BayesianACC', 'BayesianKDEy', 'BaKDE-emcee', 'BaKDE-numpyro']
|
||||
methods = ['BayesianACC', #'BayesianKDEy',
|
||||
#'BaKDE-emcee',
|
||||
# 'BaKDE-numpyro',
|
||||
# 'BaKDE-numpyro-T2',
|
||||
# 'BaKDE-numpyro-T10',
|
||||
# 'BaKDE-numpyro-T*',
|
||||
# 'BaKDE-Ait-numpyro',
|
||||
'BaKDE-Ait-numpyro-T*',
|
||||
# 'BaKDE-Ait-numpyro-T*ILR',
|
||||
'BootstrapACC',
|
||||
'BootstrapHDy',
|
||||
'BootstrapKDEy'
|
||||
]
|
||||
|
||||
def nicer(name:str):
|
||||
replacements = {
|
||||
'Bayesian': 'Ba',
|
||||
'Bootstrap': 'Bo',
|
||||
'numpyro': 'ro',
|
||||
'emcee': 'emc',
|
||||
}
|
||||
for k, v in replacements.items():
|
||||
name = name.replace(k,v)
|
||||
return name
|
||||
|
||||
for setup in ['multiclass']:
|
||||
path = f'./results/{setup}/*.pkl'
|
||||
|
|
@ -93,7 +116,7 @@ for setup in ['multiclass']:
|
|||
report = pickle.load(open(file, 'rb'))
|
||||
results = report['results']
|
||||
n_samples = len(results['ae'])
|
||||
table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
|
||||
table['method'].extend([nicer(method)] * n_samples)
|
||||
table['dataset'].extend([dataset] * n_samples)
|
||||
table['ae'].extend(results['ae'])
|
||||
table['rae'].extend(results['rae'])
|
||||
|
|
@ -101,28 +124,29 @@ for setup in ['multiclass']:
|
|||
# table['a-CI'].extend(results['amplitude'])
|
||||
|
||||
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
|
||||
update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
||||
update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
||||
update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
||||
# update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
||||
# update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
||||
# update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
||||
|
||||
table['c-CI'].extend(report['coverage-CI'])
|
||||
table['a-CI'].extend(report['amplitude-CI'])
|
||||
table['w-CI'].extend(report['winkler-CI'])
|
||||
table['amperr-CI'].extend(report['amperr-CI'])
|
||||
|
||||
table['c-CE'].extend(report['coverage-CE'])
|
||||
table['a-CE'].extend(report['amplitude-CE'])
|
||||
table['amperr-CE'].extend(report['amperr-CE'])
|
||||
# table['c-CE'].extend(report['coverage-CE'])
|
||||
# table['a-CE'].extend(report['amplitude-CE'])
|
||||
# table['amperr-CE'].extend(report['amperr-CE'])
|
||||
|
||||
table['c-CLR'].extend(report['coverage-CLR'])
|
||||
table['a-CLR'].extend(report['amplitude-CLR'])
|
||||
table['amperr-CLR'].extend(report['amperr-CLR'])
|
||||
# table['c-CLR'].extend(report['coverage-CLR'])
|
||||
# table['a-CLR'].extend(report['amplitude-CLR'])
|
||||
# table['amperr-CLR'].extend(report['amperr-CLR'])
|
||||
|
||||
table['c-ILR'].extend(report['coverage-ILR'])
|
||||
table['a-ILR'].extend(report['amplitude-ILR'])
|
||||
table['amperr-ILR'].extend(report['amperr-ILR'])
|
||||
# table['c-ILR'].extend(report['coverage-ILR'])
|
||||
# table['a-ILR'].extend(report['amplitude-ILR'])
|
||||
# table['amperr-ILR'].extend(report['amperr-ILR'])
|
||||
|
||||
table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
|
||||
table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
|
||||
# table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
|
||||
# table['aitch'].extend()
|
||||
# table['reg-score-ILR'].extend(
|
||||
|
|
@ -145,16 +169,20 @@ for setup in ['multiclass']:
|
|||
tr_size[dataset] = len(data.training)
|
||||
|
||||
# remove datasets with more than max_classes classes
|
||||
# max_classes = 30
|
||||
# min_train = 1000
|
||||
# for data_name, n in n_classes.items():
|
||||
# if n > max_classes:
|
||||
# df = df[df["dataset"] != data_name]
|
||||
# for data_name, n in tr_size.items():
|
||||
# if n < min_train:
|
||||
# df = df[df["dataset"] != data_name]
|
||||
max_classes = 25
|
||||
min_train = 500
|
||||
ignore_datasets = ['poker_hand', 'hcv']
|
||||
for data_name, n in n_classes.items():
|
||||
if n > max_classes:
|
||||
df = df[df["dataset"] != data_name]
|
||||
for data_name, n in tr_size.items():
|
||||
if n < min_train:
|
||||
df = df[df["dataset"] != data_name]
|
||||
for data_name, n in tr_size.items():
|
||||
if data_name in ignore_datasets:
|
||||
df = df[df["dataset"] != data_name]
|
||||
|
||||
for region in ['CI', 'CE', 'CLR', 'ILR']:
|
||||
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
|
||||
if setup == 'binary' and region=='ILR':
|
||||
continue
|
||||
# pv = pd.pivot_table(
|
||||
|
|
@ -162,11 +190,12 @@ for setup in ['multiclass']:
|
|||
# )
|
||||
pv = pd.pivot_table(
|
||||
df, index='dataset', columns='method', values=[
|
||||
f'amperr-{region}',
|
||||
# f'amperr-{region}',
|
||||
f'a-{region}',
|
||||
f'c-{region}',
|
||||
#f'w-{region}',
|
||||
# f'w-{region}',
|
||||
'ae',
|
||||
'SRE',
|
||||
# 'rae',
|
||||
# f'aitch',
|
||||
# f'aitch-well'
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import os
|
||||
import pickle
|
||||
import warnings
|
||||
from os.path import join
|
||||
from pathlib import Path
|
||||
|
|
@ -27,7 +28,7 @@ from scipy.stats import dirichlet
|
|||
from collections import defaultdict
|
||||
from time import time
|
||||
from sklearn.base import clone, BaseEstimator
|
||||
|
||||
from temperature_calibration import temp_calibration
|
||||
|
||||
def methods():
|
||||
"""
|
||||
|
|
@ -50,7 +51,8 @@ def methods():
|
|||
# for T in [10, 20, 50, 100., 500]:
|
||||
# yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
|
||||
|
||||
yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
|
||||
# yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
|
||||
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', **hyper)
|
||||
|
||||
|
||||
|
||||
|
|
@ -68,8 +70,8 @@ if __name__ == '__main__':
|
|||
|
||||
setup = multiclass
|
||||
# data_name = 'isolet'
|
||||
data_name = 'academic-success'
|
||||
# data_name = 'abalone'
|
||||
# data_name = 'academic-success'
|
||||
data_name = 'poker_hand'
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = setup['sample_size']
|
||||
print(f'dataset={data_name}')
|
||||
|
|
@ -86,6 +88,11 @@ if __name__ == '__main__':
|
|||
f'coverage={report["results"]["coverage"].mean():.5f}, '
|
||||
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
|
||||
|
||||
# best_hyperparams = pickle.load(open(hyper_path, 'rb'))
|
||||
# method = withconf_constructor(best_hyperparams)
|
||||
#
|
||||
# train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
|
||||
# temp_calibration(method, train, val, amplitude_threshold=0.1475, temp_grid=[2., 10., 100., 1000.])#temp_grid=[1., 1.25, 1.5, 1.75, 2.])
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,129 @@
|
|||
import os.path
|
||||
import pickle
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import quapy as qp
|
||||
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
|
||||
from BayesianKDEy.full_experiments import experiment_path
|
||||
from quapy.protocol import UPP
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
from joblib import Parallel, delayed
|
||||
from itertools import product
|
||||
|
||||
# is there a correspondence between smaller bandwidths and overconfident likelihoods? if so, a high temperature
|
||||
# after calibration might be correlated; this script aims at analyzing this trend
|
||||
|
||||
datasets = qp.datasets.UCI_MULTICLASS_DATASETS
|
||||
|
||||
def show(results, values):
|
||||
df_res = pd.DataFrame(results)
|
||||
df_mean = (
|
||||
df_res
|
||||
.groupby(['bandwidth', 'temperature'], as_index=False)
|
||||
.mean(numeric_only=True)
|
||||
)
|
||||
pivot = df_mean.pivot(
|
||||
index='bandwidth',
|
||||
columns='temperature',
|
||||
values=values
|
||||
)
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
|
||||
plt.imshow(pivot, origin='lower', aspect='auto')
|
||||
plt.colorbar(label=values)
|
||||
|
||||
plt.xticks(range(len(pivot.columns)), pivot.columns)
|
||||
plt.yticks(range(len(pivot.index)), pivot.index)
|
||||
|
||||
plt.xlabel('Temperature')
|
||||
plt.ylabel('Bandwidth')
|
||||
plt.title(f'{values} heatmap')
|
||||
|
||||
plt.savefig(f'./plotcorr/{values}.png')
|
||||
plt.cla()
|
||||
plt.clf()
|
||||
|
||||
def run_experiment(
|
||||
bandwidth,
|
||||
temperature,
|
||||
train,
|
||||
test,
|
||||
dataset,
|
||||
):
|
||||
qp.environ['SAMPLE_SIZE'] = 1000
|
||||
bakde = BayesianKDEy(
|
||||
engine='numpyro',
|
||||
bandwidth=bandwidth,
|
||||
temperature=temperature,
|
||||
)
|
||||
bakde.fit(*train.Xy)
|
||||
|
||||
test_generator = UPP(test, repeats=20, random_state=0)
|
||||
|
||||
rows = []
|
||||
|
||||
for i, (sample, prev) in enumerate(
|
||||
tqdm(
|
||||
test_generator(),
|
||||
desc=f"bw={bandwidth}, T={temperature}",
|
||||
total=test_generator.total(),
|
||||
leave=False,
|
||||
)
|
||||
):
|
||||
point_estimate, region = bakde.predict_conf(sample)
|
||||
|
||||
rows.append({
|
||||
"bandwidth": bandwidth,
|
||||
"temperature": temperature,
|
||||
"dataset": dataset,
|
||||
"sample": i,
|
||||
"mae": qp.error.mae(prev, point_estimate),
|
||||
"cov": region.coverage(prev),
|
||||
"amp": region.montecarlo_proportion(n_trials=50_000),
|
||||
})
|
||||
|
||||
return rows
|
||||
|
||||
|
||||
bandwidths = [0.001, 0.005, 0.01, 0.05, 0.1]
|
||||
temperatures = [0.5, 0.75, 1., 2., 5.]
|
||||
|
||||
res_dir = './plotcorr/results'
|
||||
os.makedirs(res_dir, exist_ok=True)
|
||||
|
||||
all_rows = []
|
||||
for i, dataset in enumerate(datasets):
|
||||
if dataset in ['letter', 'isolet']: continue
|
||||
res_path = f'{res_dir}/{dataset}.pkl'
|
||||
if os.path.exists(res_path):
|
||||
print(f'loading results from pickle {res_path}')
|
||||
results_data_rows = pickle.load(open(res_path, 'rb'))
|
||||
else:
|
||||
print(f'{dataset=} [complete={i}/{len(datasets)}]')
|
||||
data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
|
||||
train, test = data.train_test
|
||||
jobs = list(product(bandwidths, temperatures))
|
||||
results_data_rows = Parallel(n_jobs=-1,backend="loky")(
|
||||
delayed(run_experiment)(bw, T, train, test, dataset) for bw, T in jobs
|
||||
)
|
||||
pickle.dump(results_data_rows, open(res_path, 'wb'), pickle.HIGHEST_PROTOCOL)
|
||||
all_rows.extend(results_data_rows)
|
||||
|
||||
results = defaultdict(list)
|
||||
for rows in all_rows:
|
||||
for row in rows:
|
||||
for k, v in row.items():
|
||||
results[k].append(v)
|
||||
|
||||
show(results, 'mae')
|
||||
show(results, 'cov')
|
||||
show(results, 'amp')
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,114 @@
|
|||
from build.lib.quapy.data import LabelledCollection
|
||||
from quapy.method.confidence import WithConfidenceABC
|
||||
from quapy.protocol import UPP
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import quapy as qp
|
||||
from joblib import Parallel, delayed
|
||||
import copy
|
||||
|
||||
|
||||
|
||||
|
||||
def temp_calibration(method:WithConfidenceABC,
|
||||
train:LabelledCollection,
|
||||
val:LabelledCollection,
|
||||
temp_grid=[1, 1.5, 2],
|
||||
num_samples=100,
|
||||
nominal_coverage=0.95,
|
||||
amplitude_threshold='auto',
|
||||
random_state=0,
|
||||
n_jobs=1,
|
||||
verbose=True):
|
||||
|
||||
assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold < 1.), \
|
||||
f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float < 1.0.'
|
||||
|
||||
if amplitude_threshold=='auto':
|
||||
n_classes = train.n_classes
|
||||
amplitude_threshold = .1/np.log(n_classes+1)
|
||||
|
||||
if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
|
||||
print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
|
||||
|
||||
|
||||
method.fit(*train.Xy)
|
||||
label_shift_prot = UPP(val, repeats=num_samples, random_state=random_state)
|
||||
|
||||
# results = []
|
||||
# temp_grid = sorted(temp_grid)
|
||||
# for temp in temp_grid:
|
||||
# method.temperature = temp
|
||||
# coverage = 0
|
||||
# amplitudes = []
|
||||
# errs = []
|
||||
# pbar = tqdm(enumerate(label_shift_prot()), total=label_shift_prot.total(), disable=not verbose)
|
||||
# for i, (sample, prev) in pbar:
|
||||
# point_estim, conf_region = method.predict_conf(sample)
|
||||
# if prev in conf_region:
|
||||
# coverage += 1
|
||||
# amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
|
||||
# errs.append(qp.error.mae(prev, point_estim))
|
||||
# if verbose:
|
||||
# pbar.set_description(
|
||||
# f'temperature={temp:.2f}, '
|
||||
# f'coverage={coverage/(i+1):.4f}, '
|
||||
# f'amplitude={np.mean(amplitudes):.4f},'
|
||||
# f'mae={np.mean(errs):.4f}'
|
||||
# )
|
||||
#
|
||||
# mean_coverage = coverage / label_shift_prot.total()
|
||||
# mean_amplitude = np.mean(amplitudes)
|
||||
#
|
||||
# if mean_amplitude < amplitude_threshold:
|
||||
# results.append((temp, mean_coverage, mean_amplitude))
|
||||
# else:
|
||||
# break
|
||||
|
||||
def evaluate_temperature(temp):
|
||||
local_method = copy.deepcopy(method)
|
||||
local_method.temperature = temp
|
||||
|
||||
coverage = 0
|
||||
amplitudes = []
|
||||
errs = []
|
||||
|
||||
for i, (sample, prev) in enumerate(label_shift_prot()):
|
||||
point_estim, conf_region = local_method.predict_conf(sample)
|
||||
|
||||
if prev in conf_region:
|
||||
coverage += 1
|
||||
|
||||
amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
|
||||
errs.append(qp.error.mae(prev, point_estim))
|
||||
|
||||
mean_coverage = coverage / label_shift_prot.total()
|
||||
mean_amplitude = np.mean(amplitudes)
|
||||
|
||||
return temp, mean_coverage, mean_amplitude
|
||||
|
||||
temp_grid = sorted(temp_grid)
|
||||
|
||||
raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
|
||||
delayed(evaluate_temperature)(temp)
|
||||
for temp in tqdm(temp_grid, disable=not verbose)
|
||||
)
|
||||
results = [
|
||||
(temp, cov, amp)
|
||||
for temp, cov, amp in raw_results
|
||||
if amp < amplitude_threshold
|
||||
]
|
||||
|
||||
chosen_temperature = 1.
|
||||
if len(results) > 0:
|
||||
chosen_temperature = min(results, key=lambda x: abs(x[1]-nominal_coverage))[0]
|
||||
|
||||
print(f'chosen_temperature={chosen_temperature:.2f}')
|
||||
|
||||
return chosen_temperature
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -738,14 +738,29 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
|
|||
print(f'Loading UCI Muticlass {dataset_name} ({fullname})')
|
||||
|
||||
file = join(data_home, 'uci_multiclass', dataset_name+'.pkl')
|
||||
|
||||
|
||||
def dummify_categorical_features(df_features, dataset_id):
|
||||
CATEGORICAL_FEATURES = {
|
||||
158: ["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5"], # poker_hand
|
||||
}
|
||||
|
||||
categorical = CATEGORICAL_FEATURES.get(dataset_id, [])
|
||||
|
||||
X = df_features.copy()
|
||||
if categorical:
|
||||
X[categorical] = X[categorical].astype("category")
|
||||
X = pd.get_dummies(X, columns=categorical, drop_first=True)
|
||||
|
||||
return X
|
||||
|
||||
def download(id, name):
|
||||
df = fetch_ucirepo(id=id)
|
||||
|
||||
df.data.features = pd.get_dummies(df.data.features, drop_first=True)
|
||||
X, y = df.data.features.to_numpy(dtype=np.float64), df.data.targets.to_numpy().squeeze()
|
||||
X_df = dummify_categorical_features(df.data.features, id)
|
||||
X = X_df.to_numpy(dtype=np.float64)
|
||||
y = df.data.targets.to_numpy().squeeze()
|
||||
|
||||
assert y.ndim == 1, 'more than one y'
|
||||
assert y.ndim == 1, f'error: the dataset {id=} {name=} has more than one target variable'
|
||||
|
||||
classes = np.sort(np.unique(y))
|
||||
y = np.searchsorted(classes, y)
|
||||
|
|
|
|||
|
|
@ -139,7 +139,8 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
|||
:math:`\\mathcal{Y}` are the classes of interest
|
||||
:param prevs_true: array-like, true prevalence values
|
||||
:param prevs_hat: array-like, estimated prevalence values
|
||||
:param prevs_train: array-like, training prevalence values
|
||||
:param prevs_train: array-like with the training prevalence values, or a single prevalence vector when
|
||||
all the prevs_true and prevs_hat are computed on the same training set.
|
||||
:param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
|
||||
training prevalence is expected to be >0 everywhere.
|
||||
:return: the squared ratio error
|
||||
|
|
@ -149,6 +150,8 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
|||
prevs_train = np.asarray(prevs_train)
|
||||
assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}'
|
||||
assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence'
|
||||
if prevs_true.ndim == 2 and prevs_train.ndim == 1:
|
||||
prevs_train = np.tile(prevs_train, reps=(prevs_true.shape[0], 1))
|
||||
if eps>0:
|
||||
prevs_true = smooth(prevs_true, eps)
|
||||
prevs_hat = smooth(prevs_hat, eps)
|
||||
|
|
@ -157,12 +160,13 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
|||
N = prevs_true.shape[-1]
|
||||
w = prevs_true / prevs_train
|
||||
w_hat = prevs_hat / prevs_train
|
||||
return (1./N) * (w - w_hat)**2.
|
||||
return (1./N) * np.sum((w - w_hat)**2., axis=-1)
|
||||
|
||||
|
||||
def msre(prevs_true, prevs_hat, prevs_train, eps=0.):
|
||||
"""
|
||||
Returns the mean across all experiments. See :function:`sre`.
|
||||
Returns the mean :function:`sre` across all experiments.
|
||||
|
||||
:param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,)
|
||||
:param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true
|
||||
:param prevs_train: array-like, training prevalence values of (n_experiments, n_classes,) or (n_classes,)
|
||||
|
|
|
|||
|
|
@ -139,7 +139,7 @@ class WithConfidenceABC(ABC):
|
|||
"""
|
||||
Abstract class for confidence regions.
|
||||
"""
|
||||
METHODS = ['intervals', 'ellipse', 'ellipse-clr']
|
||||
REGION_TYPE = ['intervals', 'ellipse', 'ellipse-clr', 'ellipse-ilr']
|
||||
|
||||
@abstractmethod
|
||||
def predict_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
|
||||
|
|
@ -185,6 +185,8 @@ class WithConfidenceABC(ABC):
|
|||
region = ConfidenceEllipseSimplex(prev_estims, confidence_level=confidence_level)
|
||||
elif method == 'ellipse-clr':
|
||||
region = ConfidenceEllipseCLR(prev_estims, confidence_level=confidence_level)
|
||||
elif method == 'ellipse-ilr':
|
||||
region = ConfidenceEllipseILR(prev_estims, confidence_level=confidence_level)
|
||||
|
||||
if region is None:
|
||||
raise NotImplementedError(f'unknown method {method}')
|
||||
|
|
|
|||
Loading…
Reference in New Issue