import pickle from collections import defaultdict from joblib import Parallel, delayed from tqdm import tqdm import pandas as pd from glob import glob from pathlib import Path import quapy as qp from BayesianKDEy.commons import RESULT_DIR from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler from error import dist_aitchison from quapy.method.confidence import ConfidenceIntervals from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC import quapy.functional as F pd.set_option('display.max_columns', None) pd.set_option('display.width', 2000) pd.set_option('display.max_rows', None) pd.set_option("display.expand_frame_repr", False) pd.set_option("display.precision", 4) pd.set_option("display.float_format", "{:.4f}".format) # methods = None # show all methods methods = ['BayesianACC', 'BaKDE-Ait-numpyro', 'BaKDE-Ait-T*', 'BaKDE-Gau-numpyro', 'BaKDE-Gau-T*', # 'BayEMQ-U-Temp1-2', # 'BayEMQ-T*', 'BayEMQ', 'BayEMQ*', # 'BootstrapACC', # 'BootstrapHDy', # 'BootstrapKDEy', # 'BootstrapEMQ' ] def region_score(true_prev, region: ConfidenceRegionABC): amp = region.montecarlo_proportion(50_000) if true_prev in region: cost = 0 else: scale_cost = 1/region.alpha cost = scale_cost * dist_aitchison(true_prev, region.closest_point_in_region(true_prev)) return amp + cost def compute_coverage_amplitude(region_constructor, **kwargs): all_samples = results['samples'] all_true_prevs = results['true-prevs'] def process_one(samples, true_prevs): region = region_constructor(samples, **kwargs) if isinstance(region, ConfidenceIntervals): winkler = region.mean_winkler_score(true_prevs) else: winkler = None return region.coverage(true_prevs), region.montecarlo_proportion(), winkler out = Parallel(n_jobs=3)( delayed(process_one)(samples, true_prevs) for samples, true_prevs in tqdm( zip(all_samples, all_true_prevs), total=len(all_samples), desc='constructing ellipses' ) ) # unzip results coverage, amplitude, winkler = zip(*out) return list(coverage), list(amplitude), list(winkler) def update_pickle(report, pickle_path, updated_dict:dict): for k,v in updated_dict.items(): report[k]=v pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL) def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs): if f'coverage-{conf_name}' not in report: covs, amps, winkler = compute_coverage_amplitude(conf_region_class, **kwargs) # amperr (lower is better) counts the amplitude when the true vale was covered, or 1 (max amplitude) otherwise amperrs = [amp if cov == 1.0 else 1. for amp, cov in zip(amps, covs)] update_fields = { f'coverage-{conf_name}': covs, f'amplitude-{conf_name}': amps, f'winkler-{conf_name}': winkler, f'amperr-{conf_name}': amperrs, } update_pickle(report, file, update_fields) def nicer(name:str): replacements = { 'Bayesian': 'Ba', 'Bootstrap': 'Bo', '-numpyro': '', 'emcee': 'emc', '-T*': '*' } for k, v in replacements.items(): name = name.replace(k,v) return name base_dir = RESULT_DIR table = defaultdict(list) n_classes = {} tr_size = {} tr_prev = {} for dataset_handler in [UCIMulticlassHandler, LeQuaHandler, VisualDataHandler, CIFAR100Handler]: problem_type = 'binary' if dataset_handler.is_binary() else 'multiclass' path = f'./{base_dir}/{problem_type}/*.pkl' for file in tqdm(glob(path), desc='processing results', total=len(glob(path))): file = Path(file) dataset, method = file.name.replace('.pkl', '').split('__') if (method not in methods) or (dataset not in dataset_handler.get_datasets()): continue report = pickle.load(open(file, 'rb')) results = report['results'] n_samples = len(results['ae']) table['method'].extend([nicer(method)] * n_samples) table['dataset'].extend([dataset] * n_samples) table['ae'].extend(results['ae']) table['rae'].extend(results['rae']) # table['c-CI'].extend(results['coverage']) # table['a-CI'].extend(results['amplitude']) update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True) # update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex) # update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR) # update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR) table['c-CI'].extend(report['coverage-CI']) table['a-CI'].extend(report['amplitude-CI']) table['w-CI'].extend(report['winkler-CI']) table['amperr-CI'].extend(report['amperr-CI']) # table['c-CE'].extend(report['coverage-CE']) # table['a-CE'].extend(report['amplitude-CE']) # table['amperr-CE'].extend(report['amperr-CE']) # table['c-CLR'].extend(report['coverage-CLR']) # table['a-CLR'].extend(report['amplitude-CLR']) # table['amperr-CLR'].extend(report['amperr-CLR']) # table['c-ILR'].extend(report['coverage-ILR']) # table['a-ILR'].extend(report['amplitude-ILR']) # table['amperr-ILR'].extend(report['amperr-ILR']) table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim'])) table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001)) # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']])) # table['aitch'].extend() # table['reg-score-ILR'].extend( # [region_score(true_prev, ConfidenceEllipseILR(samples)) for true_prev, samples in zip(results['true-prevs'], results['samples'])] # ) for dataset in dataset_handler.iter(): train = dataset.get_training() n_classes[dataset.name] = train.n_classes tr_size[dataset.name] = len(train) tr_prev[dataset.name] = F.strprev(train.prevalence()) # remove datasets with more than max_classes classes # max_classes = 25 # min_train = 500 # ignore_datasets = ['poker_hand', 'hcv'] # for data_name, n in n_classes.items(): # if n > max_classes: # df = df[df["dataset"] != data_name] # for data_name, n in tr_size.items(): # if n < min_train: # df = df[df["dataset"] != data_name] # for data_name, n in tr_size.items(): # if data_name in ignore_datasets: # df = df[df["dataset"] != data_name] df = pd.DataFrame(table) for region in ['CI']: #, 'CLR', 'ILR', 'CI']: if problem_type == 'binary' and region=='ILR': continue for column in [f'a-{region}', f'c-{region}', 'ae', 'SRE']: pv = pd.pivot_table( df, index='dataset', columns='method', values=column, margins=True ) pv['n_classes'] = pv.index.map(n_classes).astype('Int64') pv['tr_size'] = pv.index.map(tr_size).astype('Int64') #pv['tr-prev'] = pv.index.map(tr_prev) pv = pv.drop(columns=[col for col in pv.columns if col[-1] == "All"]) print(f'{problem_type=} {column=}') print(pv) print('-'*80)