QuaPy/BayesianKDEy/generate_results.py

209 lines
7.6 KiB
Python

import pickle
from collections import defaultdict
from joblib import Parallel, delayed
from tqdm import tqdm
import pandas as pd
from glob import glob
from pathlib import Path
import quapy as qp
from BayesianKDEy.commons import RESULT_DIR
from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, MNISTHandler
from error import dist_aitchison
from quapy.method.confidence import ConfidenceIntervals
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
import quapy.functional as F
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 2000)
pd.set_option('display.max_rows', None)
pd.set_option("display.expand_frame_repr", False)
pd.set_option("display.precision", 4)
pd.set_option("display.float_format", "{:.4f}".format)
# methods = None # show all methods
methods = ['BayesianACC',
'BaKDE-Ait-numpyro',
'BaKDE-Ait-T*',
'BaKDE-Gau-numpyro',
'BaKDE-Gau-T*',
# 'BayEMQ-U-Temp1-2',
# 'BayEMQ-T*',
'BayEMQ',
'BayEMQ*',
# 'BootstrapACC',
# 'BootstrapHDy',
# 'BootstrapKDEy',
# 'BootstrapEMQ'
]
def region_score(true_prev, region: ConfidenceRegionABC):
amp = region.montecarlo_proportion(50_000)
if true_prev in region:
cost = 0
else:
scale_cost = 1/region.alpha
cost = scale_cost * dist_aitchison(true_prev, region.closest_point_in_region(true_prev))
return amp + cost
def compute_coverage_amplitude(region_constructor, **kwargs):
all_samples = results['samples']
all_true_prevs = results['true-prevs']
def process_one(samples, true_prevs):
region = region_constructor(samples, **kwargs)
if isinstance(region, ConfidenceIntervals):
winkler = region.mean_winkler_score(true_prevs)
else:
winkler = None
return region.coverage(true_prevs), region.montecarlo_proportion(), winkler
out = Parallel(n_jobs=3)(
delayed(process_one)(samples, true_prevs)
for samples, true_prevs in tqdm(
zip(all_samples, all_true_prevs),
total=len(all_samples),
desc='constructing ellipses'
)
)
# unzip results
coverage, amplitude, winkler = zip(*out)
return list(coverage), list(amplitude), list(winkler)
def update_pickle(report, pickle_path, updated_dict:dict):
for k,v in updated_dict.items():
report[k]=v
pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
if f'coverage-{conf_name}' not in report:
covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs)
# amperr (lower is better) counts the amplitude when the true vale was covered, or 1 (max amplitude) otherwise
amperrs = [amp if cov == 1.0 else 1. for amp, cov in zip(amps, covs)]
update_fields = {
f'coverage-{conf_name}': covs,
f'amplitude-{conf_name}': amps,
f'winkler-{conf_name}': winkler,
f'amperr-{conf_name}': amperrs,
}
update_pickle(report, file, update_fields)
def nicer(name:str):
replacements = {
'Bayesian': 'Ba',
'Bootstrap': 'Bo',
'-numpyro': '',
'emcee': 'emc',
'-T*': '*'
}
for k, v in replacements.items():
name = name.replace(k,v)
return name
base_dir = RESULT_DIR
table = defaultdict(list)
n_classes = {}
tr_size = {}
tr_prev = {}
for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, MNISTHandler]:
problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass'
path = f'./{base_dir}/{problem_type}/*.pkl'
for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
file = Path(file)
dataset, method = file.name.replace('.pkl', '').split('__')
if (method not in methods) or (dataset not in dataset_handler.get_datasets()):
continue
report = pickle.load(open(file, 'rb'))
results = report['results']
n_samples = len(results['ae'])
table['method'].extend([nicer(method)] * n_samples)
table['dataset'].extend([dataset] * n_samples)
table['ae'].extend(results['ae'])
table['rae'].extend(results['rae'])
# table['c-CI'].extend(results['coverage'])
# table['a-CI'].extend(results['amplitude'])
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
# update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
# update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
# update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
table['c-CI'].extend(report['coverage-CI'])
table['a-CI'].extend(report['amplitude-CI'])
table['w-CI'].extend(report['winkler-CI'])
table['amperr-CI'].extend(report['amperr-CI'])
# table['c-CE'].extend(report['coverage-CE'])
# table['a-CE'].extend(report['amplitude-CE'])
# table['amperr-CE'].extend(report['amperr-CE'])
# table['c-CLR'].extend(report['coverage-CLR'])
# table['a-CLR'].extend(report['amplitude-CLR'])
# table['amperr-CLR'].extend(report['amperr-CLR'])
# table['c-ILR'].extend(report['coverage-ILR'])
# table['a-ILR'].extend(report['amplitude-ILR'])
# table['amperr-ILR'].extend(report['amperr-ILR'])
table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
# table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
# table['aitch'].extend()
# table['reg-score-ILR'].extend(
# [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])]
# )
for dataset in dataset_handler.iter():
train = dataset.get_training()
n_classes[dataset.name] = train.n_classes
tr_size[dataset.name] = len(train)
tr_prev[dataset.name] = F.strprev(train.prevalence())
# remove datasets with more than max_classes classes
# max_classes = 25
# min_train = 500
# ignore_datasets = ['poker_hand', 'hcv']
# for data_name, n in n_classes.items():
# if n > max_classes:
# df = df[df["dataset"] != data_name]
# for data_name, n in tr_size.items():
# if n < min_train:
# df = df[df["dataset"] != data_name]
# for data_name, n in tr_size.items():
# if data_name in ignore_datasets:
# df = df[df["dataset"] != data_name]
df = pd.DataFrame(table)
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
if problem_type == 'binary' and region=='ILR':
continue
for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']:
pv = pd.pivot_table(
df, index='dataset', columns='method', values=column, margins=True
)
pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
#pv['tr-prev'] = pv.index.map(tr_prev)
pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"])
print(f'{problem_type=} {column=}')
print(pv)
print('-'*80)