214 lines
7.8 KiB
Python
214 lines
7.8 KiB
Python
import pickle
|
|
from collections import defaultdict
|
|
|
|
from joblib import Parallel, delayed
|
|
from tqdm import tqdm
|
|
import pandas as pd
|
|
from glob import glob
|
|
from pathlib import Path
|
|
import quapy as qp
|
|
from BayesianKDEy.full_experiments import fetch_UCI_multiclass, fetch_UCI_binary
|
|
from error import dist_aitchison
|
|
from quapy.method.confidence import ConfidenceIntervals
|
|
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
|
|
|
pd.set_option('display.max_columns', None)
|
|
pd.set_option('display.width', 2000)
|
|
pd.set_option('display.max_rows', None)
|
|
pd.set_option("display.expand_frame_repr", False)
|
|
pd.set_option("display.precision", 4)
|
|
pd.set_option("display.float_format", "{:.4f}".format)
|
|
|
|
|
|
def region_score(true_prev, region: ConfidenceRegionABC):
|
|
amp = region.montecarlo_proportion(50_000)
|
|
if true_prev in region:
|
|
cost = 0
|
|
else:
|
|
scale_cost = 1/region.alpha
|
|
cost = scale_cost * dist_aitchison(true_prev, region.closest_point_in_region(true_prev))
|
|
return amp + cost
|
|
|
|
|
|
|
|
def compute_coverage_amplitude(region_constructor, **kwargs):
|
|
all_samples = results['samples']
|
|
all_true_prevs = results['true-prevs']
|
|
|
|
def process_one(samples, true_prevs):
|
|
region = region_constructor(samples, **kwargs)
|
|
if isinstance(region, ConfidenceIntervals):
|
|
winkler = region.mean_winkler_score(true_prevs)
|
|
else:
|
|
winkler = None
|
|
return region.coverage(true_prevs), region.montecarlo_proportion(), winkler
|
|
|
|
out = Parallel(n_jobs=3)(
|
|
delayed(process_one)(samples, true_prevs)
|
|
for samples, true_prevs in tqdm(
|
|
zip(all_samples, all_true_prevs),
|
|
total=len(all_samples),
|
|
desc='constructing ellipses'
|
|
)
|
|
)
|
|
|
|
# unzip results
|
|
coverage, amplitude, winkler = zip(*out)
|
|
return list(coverage), list(amplitude), list(winkler)
|
|
|
|
|
|
def update_pickle(report, pickle_path, updated_dict:dict):
|
|
for k,v in updated_dict.items():
|
|
report[k]=v
|
|
pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
|
|
|
def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
|
|
if f'coverage-{conf_name}' not in report:
|
|
covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs)
|
|
|
|
# amperr (lower is better) counts the amplitude when the true vale was covered, or 1 (max amplitude) otherwise
|
|
amperrs = [amp if cov == 1.0 else 1. for amp, cov in zip(amps, covs)]
|
|
|
|
update_fields = {
|
|
f'coverage-{conf_name}': covs,
|
|
f'amplitude-{conf_name}': amps,
|
|
f'winkler-{conf_name}': winkler,
|
|
f'amperr-{conf_name}': amperrs,
|
|
}
|
|
|
|
update_pickle(report, file, update_fields)
|
|
|
|
|
|
# methods = None # show all methods
|
|
methods = ['BayesianACC', #'BayesianKDEy',
|
|
#'BaKDE-emcee',
|
|
# 'BaKDE-numpyro',
|
|
# 'BaKDE-numpyro-T2',
|
|
# 'BaKDE-numpyro-T10',
|
|
# 'BaKDE-numpyro-T*',
|
|
# 'BaKDE-Ait-numpyro',
|
|
# 'BaKDE-Ait-numpyro-T*',
|
|
'BaKDE-Ait-numpyro-T*-U',
|
|
'BootstrapACC',
|
|
'BootstrapHDy',
|
|
'BootstrapKDEy',
|
|
'BootstrapEMQ'
|
|
]
|
|
|
|
def nicer(name:str):
|
|
replacements = {
|
|
'Bayesian': 'Ba',
|
|
'Bootstrap': 'Bo',
|
|
'numpyro': 'ro',
|
|
'emcee': 'emc',
|
|
}
|
|
for k, v in replacements.items():
|
|
name = name.replace(k,v)
|
|
return name
|
|
|
|
for setup in ['multiclass']:
|
|
path = f'./results/{setup}/*.pkl'
|
|
table = defaultdict(list)
|
|
for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
|
|
file = Path(file)
|
|
dataset, method = file.name.replace('.pkl', '').split('__')
|
|
if methods is not None and method not in methods:
|
|
continue
|
|
report = pickle.load(open(file, 'rb'))
|
|
results = report['results']
|
|
n_samples = len(results['ae'])
|
|
table['method'].extend([nicer(method)] * n_samples)
|
|
table['dataset'].extend([dataset] * n_samples)
|
|
table['ae'].extend(results['ae'])
|
|
table['rae'].extend(results['rae'])
|
|
# table['c-CI'].extend(results['coverage'])
|
|
# table['a-CI'].extend(results['amplitude'])
|
|
|
|
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
|
|
# update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
|
# update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
|
# update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
|
|
|
table['c-CI'].extend(report['coverage-CI'])
|
|
table['a-CI'].extend(report['amplitude-CI'])
|
|
table['w-CI'].extend(report['winkler-CI'])
|
|
table['amperr-CI'].extend(report['amperr-CI'])
|
|
|
|
# table['c-CE'].extend(report['coverage-CE'])
|
|
# table['a-CE'].extend(report['amplitude-CE'])
|
|
# table['amperr-CE'].extend(report['amperr-CE'])
|
|
|
|
# table['c-CLR'].extend(report['coverage-CLR'])
|
|
# table['a-CLR'].extend(report['amplitude-CLR'])
|
|
# table['amperr-CLR'].extend(report['amperr-CLR'])
|
|
|
|
# table['c-ILR'].extend(report['coverage-ILR'])
|
|
# table['a-ILR'].extend(report['amplitude-ILR'])
|
|
# table['amperr-ILR'].extend(report['amperr-ILR'])
|
|
|
|
table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
|
|
table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
|
|
# table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
|
|
# table['aitch'].extend()
|
|
# table['reg-score-ILR'].extend(
|
|
# [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
|
|
# )
|
|
|
|
|
|
|
|
df = pd.DataFrame(table)
|
|
|
|
n_classes = {}
|
|
tr_size = {}
|
|
for dataset in df['dataset'].unique():
|
|
fetch_fn = {
|
|
'binary': fetch_UCI_binary,
|
|
'multiclass': fetch_UCI_multiclass
|
|
}[setup]
|
|
data = fetch_fn(dataset)
|
|
n_classes[dataset] = data.n_classes
|
|
tr_size[dataset] = len(data.training)
|
|
|
|
# remove datasets with more than max_classes classes
|
|
max_classes = 25
|
|
min_train = 500
|
|
ignore_datasets = ['poker_hand', 'hcv']
|
|
for data_name, n in n_classes.items():
|
|
if n > max_classes:
|
|
df = df[df["dataset"] != data_name]
|
|
for data_name, n in tr_size.items():
|
|
if n < min_train:
|
|
df = df[df["dataset"] != data_name]
|
|
for data_name, n in tr_size.items():
|
|
if data_name in ignore_datasets:
|
|
df = df[df["dataset"] != data_name]
|
|
|
|
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
|
|
if setup == 'binary' and region=='ILR':
|
|
continue
|
|
# pv = pd.pivot_table(
|
|
# df, index='dataset', columns='method', values=['ae', f'c-{region}', f'a-{region}'], margins=True
|
|
# )
|
|
pv = pd.pivot_table(
|
|
df, index='dataset', columns='method', values=[
|
|
# f'amperr-{region}',
|
|
f'a-{region}',
|
|
f'c-{region}',
|
|
# f'w-{region}',
|
|
'ae',
|
|
'SRE',
|
|
# 'rae',
|
|
# f'aitch',
|
|
# f'aitch-well'
|
|
# 'reg-score-ILR',
|
|
], margins=True
|
|
)
|
|
pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
|
|
pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
|
|
pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
|
|
print(f'{setup=}')
|
|
print(pv)
|
|
print('-'*80)
|
|
|