408 lines
14 KiB
Python
408 lines
14 KiB
Python
import pickle
|
|
from collections import defaultdict
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
from joblib import Parallel, delayed
|
|
from tqdm import tqdm
|
|
import pandas as pd
|
|
from glob import glob
|
|
from pathlib import Path
|
|
import quapy as qp
|
|
from BayesianKDEy.commons import RESULT_DIR
|
|
from BayesianKDEy.datasets import LeQuaHandler, UCIMulticlassHandler, VisualDataHandler, CIFAR100Handler
|
|
from comparison_group import SelectGreaterThan, SelectByName, SelectSmallerThan
|
|
from format import FormatModifierSelectColor
|
|
from quapy.error import dist_aitchison
|
|
from quapy.method.confidence import ConfidenceIntervals, ConfidenceIntervalsILR, ConfidenceIntervalsCLR
|
|
from quapy.method.confidence import ConfidenceEllipseSimplex, ConfidenceEllipseCLR, ConfidenceEllipseILR, ConfidenceIntervals, ConfidenceRegionABC
|
|
import quapy.functional as F
|
|
from result_path.src.table import LatexTable
|
|
import numpy as np
|
|
import pandas as pd
|
|
from itertools import chain
|
|
|
|
pd.set_option('display.max_columns', None)
|
|
pd.set_option('display.width', 2000)
|
|
pd.set_option('display.max_rows', None)
|
|
pd.set_option("display.expand_frame_repr", False)
|
|
pd.set_option("display.precision", 4)
|
|
pd.set_option("display.float_format", "{:.4f}".format)
|
|
|
|
|
|
# methods = None # show all methods
|
|
methods = ['BoCC',
|
|
'BaACC!',
|
|
'BaEMQ!',
|
|
'BaKDE-Gau-T!',
|
|
'BaKDE-Ait-T!',
|
|
'BaKDE-Ait-T!2'
|
|
#'BootstrapACC',
|
|
#'BootstrapHDy',
|
|
#'BootstrapKDEy',
|
|
#'BootstrapEMQ'
|
|
]
|
|
|
|
def region_score(true_prev, region: ConfidenceRegionABC):
|
|
amp = region.montecarlo_proportion(50_000)
|
|
if true_prev in region:
|
|
cost = 0
|
|
else:
|
|
scale_cost = 1/region.alpha
|
|
cost = scale_cost * dist_aitchison(true_prev, region.closest_point_in_region(true_prev))
|
|
return amp + cost
|
|
|
|
|
|
|
|
def compute_coverage_amplitude(region_constructor, **kwargs):
|
|
all_samples = results['samples']
|
|
all_true_prevs = results['true-prevs']
|
|
|
|
def process_one(samples, true_prevs):
|
|
region = region_constructor(samples, **kwargs)
|
|
if isinstance(region, ConfidenceIntervals) or isinstance(region, ConfidenceIntervalsCLR) or isinstance(region, ConfidenceIntervalsILR):
|
|
winkler = region.mean_winkler_score(true_prevs)
|
|
# winkler_e = region.mean_winkler_score(true_prevs, add_ae=True)
|
|
cov_soft = region.coverage_soft(true_prevs)
|
|
else:
|
|
winkler = None
|
|
# winkler_e = None
|
|
cov_soft = None
|
|
return region.coverage(true_prevs), region.montecarlo_proportion(), winkler, cov_soft
|
|
|
|
out = Parallel(n_jobs=3)(
|
|
delayed(process_one)(samples, true_prevs)
|
|
for samples, true_prevs in tqdm(
|
|
zip(all_samples, all_true_prevs),
|
|
total=len(all_samples),
|
|
desc='constructing ellipses'
|
|
)
|
|
)
|
|
|
|
# unzip results
|
|
coverage, amplitude, winkler, cov_soft = zip(*out)
|
|
return list(coverage), list(amplitude), list(winkler), list(cov_soft)
|
|
|
|
|
|
def update_pickle(report, pickle_path, updated_dict:dict):
|
|
for k,v in updated_dict.items():
|
|
report[k]=v
|
|
pickle.dump(report, open(pickle_path, 'wb'), protocol=pickle.HIGHEST_PROTOCOL)
|
|
|
|
|
|
def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwargs):
|
|
if f'coverage-{conf_name}' not in report:
|
|
covs, amps, winkler, cov_soft = compute_coverage_amplitude(conf_region_class, **kwargs)
|
|
|
|
update_fields = {
|
|
f'coverage-{conf_name}': covs,
|
|
f'amplitude-{conf_name}': amps,
|
|
f'winkler-{conf_name}': winkler,
|
|
f'coverage-soft-{conf_name}': cov_soft
|
|
}
|
|
|
|
update_pickle(report, file, update_fields)
|
|
|
|
|
|
def pareto_front(df, x_col, y_col, maximize_y=True, minimize_x=True):
|
|
"""
|
|
Returns a boolean mask indicating whether each row is Pareto-optimal.
|
|
"""
|
|
X = df[x_col].values
|
|
Y = df[y_col].values
|
|
|
|
is_pareto = np.ones(len(df), dtype=bool)
|
|
|
|
for i in range(len(df)):
|
|
if not is_pareto[i]:
|
|
continue
|
|
for j in range(len(df)):
|
|
if i == j:
|
|
continue
|
|
|
|
better_or_equal_x = X[j] <= X[i] if minimize_x else X[j] >= X[i]
|
|
better_or_equal_y = Y[j] >= Y[i] if maximize_y else Y[j] <= Y[i]
|
|
|
|
strictly_better = (
|
|
(X[j] < X[i] if minimize_x else X[j] > X[i]) or
|
|
(Y[j] > Y[i] if maximize_y else Y[j] < Y[i])
|
|
)
|
|
|
|
if better_or_equal_x and better_or_equal_y and strictly_better:
|
|
is_pareto[i] = False
|
|
break
|
|
|
|
return is_pareto
|
|
|
|
def plot_coverage_vs_amplitude(
|
|
df,
|
|
coverage_col,
|
|
amplitude_col="a-CI",
|
|
method_col="method",
|
|
dataset_col=None,
|
|
error_col=None,
|
|
error_threshold=None,
|
|
nominal_coverage=0.95,
|
|
title=None,
|
|
):
|
|
df_plot = df.copy()
|
|
|
|
# Optional error filtering
|
|
if error_col is not None and error_threshold is not None:
|
|
df_plot = df_plot[df_plot[error_col] <= error_threshold]
|
|
|
|
# Compute Pareto front
|
|
pareto_mask = pareto_front(
|
|
df_plot,
|
|
x_col=amplitude_col,
|
|
y_col=coverage_col,
|
|
maximize_y=True,
|
|
minimize_x=True
|
|
)
|
|
|
|
plt.figure(figsize=(7, 6))
|
|
|
|
# Base scatter
|
|
sns.scatterplot(
|
|
data=df_plot,
|
|
x=amplitude_col,
|
|
y=coverage_col,
|
|
hue=method_col,
|
|
# style=dataset_col,
|
|
alpha=0.6,
|
|
s=60,
|
|
legend=True
|
|
)
|
|
|
|
# Highlight Pareto front
|
|
plt.scatter(
|
|
df_plot.loc[pareto_mask, amplitude_col],
|
|
df_plot.loc[pareto_mask, coverage_col],
|
|
facecolors='none',
|
|
edgecolors='black',
|
|
s=120,
|
|
linewidths=1.5,
|
|
label="Pareto front"
|
|
)
|
|
|
|
# Nominal coverage line
|
|
plt.axhline(
|
|
nominal_coverage,
|
|
linestyle="--",
|
|
color="gray",
|
|
linewidth=1,
|
|
label="Nominal coverage"
|
|
)
|
|
|
|
plt.xlabel("Amplitude (fraction of simplex)")
|
|
plt.ylabel("Coverage")
|
|
plt.ylim(0, 1.05)
|
|
|
|
if title is not None:
|
|
plt.title(title)
|
|
|
|
plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
|
|
plt.tight_layout()
|
|
plt.show()
|
|
|
|
|
|
def nicer_method(name:str):
|
|
replacements = {
|
|
# 'Bayesian': 'Ba',
|
|
'Bootstrap': 'Bo',
|
|
'-numpyro': '',
|
|
'emcee': 'emc',
|
|
'-T*': '*',
|
|
'-T!': '',
|
|
'!': '',
|
|
'-Ait': r'$^{(\mathrm{Ait})}$',
|
|
'-Gau': r'$^{(\mathrm{Gau})}$'
|
|
}
|
|
for k, v in replacements.items():
|
|
name = name.replace(k,v)
|
|
return name
|
|
|
|
|
|
def nicer_data(name:str):
|
|
replacements = {
|
|
'cifar': 'CIFAR',
|
|
'-l': '',
|
|
'mnist': 'MNIST',
|
|
'fashionmnist': 'fashionMNIST',
|
|
'svhn': 'SVHN',
|
|
'100coarse': '100(20)',
|
|
}
|
|
for k, v in replacements.items():
|
|
name = name.replace(k, v)
|
|
return name
|
|
|
|
|
|
base_dir = RESULT_DIR
|
|
|
|
table = defaultdict(list)
|
|
n_classes = {}
|
|
tr_size = {}
|
|
tr_prev = {}
|
|
|
|
dataset_class = [UCIMulticlassHandler, CIFAR100Handler, VisualDataHandler, LeQuaHandler]
|
|
dataset_order = []
|
|
for handler in dataset_class:
|
|
for dataset in handler.iter():
|
|
dataset_order.append(dataset.name)
|
|
train = dataset.get_training()
|
|
n_classes[dataset.name] = train.n_classes
|
|
tr_size[dataset.name] = len(train)
|
|
tr_prev[dataset.name] = F.strprev(train.prevalence())
|
|
|
|
|
|
problem_type = 'multiclass'
|
|
path = f'./{base_dir}/{problem_type}/*.pkl'
|
|
|
|
for file in tqdm(glob(path), desc='processing results', total=len(glob(path))):
|
|
file = Path(file)
|
|
dataset, method = file.name.replace('.pkl', '').split('__')
|
|
if (method not in methods) or (dataset not in dataset_order):
|
|
continue
|
|
|
|
report = pickle.load(open(file, 'rb'))
|
|
results = report['results']
|
|
n_samples = len(results['ae'])
|
|
table['method'].extend([nicer_method(method)] * n_samples)
|
|
table['dataset'].extend([nicer_data(dataset)] * n_samples)
|
|
table['ae'].extend(results['ae'])
|
|
table['rae'].extend(results['rae'])
|
|
# table['c-CI'].extend(results['coverage'])
|
|
# table['a-CI'].extend(results['amplitude'])
|
|
|
|
# update_pickle_with_region(report, file, conf_name='CI-ILR', conf_region_class=ConfidenceIntervalsILR, bonferroni_correction=True)
|
|
# update_pickle_with_region(report, file, conf_name='CI-CLR', conf_region_class=ConfidenceIntervalsCLR, bonferroni_correction=True)
|
|
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
|
|
update_pickle_with_region(report, file, conf_name='CInb', conf_region_class=ConfidenceIntervals, bonferroni_correction=False) # no Bonferroni-correction
|
|
# update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
|
|
# update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
|
|
# update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
|
|
|
|
conf_bonferroni = 'CI'
|
|
conf_name='CInb'
|
|
table['c-CI'].extend(report[f'coverage-{conf_bonferroni}']) # the true coverage is better measured with Bonferroni-correction
|
|
table['w-CI'].extend(report[f'winkler-{conf_name}'])
|
|
table['cs-CI'].extend(report[f'coverage-soft-{conf_name}'])
|
|
table['a-CI'].extend(report[f'amplitude-{conf_name}'])
|
|
|
|
# table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim'])) # not in this paper...
|
|
table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
|
|
|
|
|
|
|
|
# remove datasets with more than max_classes classes
|
|
# max_classes = 25
|
|
# min_train = 500
|
|
# ignore_datasets = ['poker_hand', 'hcv']
|
|
# for data_name, n in n_classes.items():
|
|
# if n > max_classes:
|
|
# df = df[df["dataset"] != data_name]
|
|
# for data_name, n in tr_size.items():
|
|
# if n < min_train:
|
|
# df = df[df["dataset"] != data_name]
|
|
# for data_name, n in tr_size.items():
|
|
# if data_name in ignore_datasets:
|
|
# df = df[df["dataset"] != data_name]
|
|
|
|
|
|
|
|
|
|
df = pd.DataFrame(table)
|
|
df['a-CI'] *= 100
|
|
df['c-CI'] *= 100
|
|
df['cs-CI'] *= 100
|
|
|
|
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
|
|
if problem_type == 'binary' and region=='ILR':
|
|
continue
|
|
for column in [f'a-{region}', 'ae', 'SRE', f'c-{region}', f'cs-{region}']: # f'w-{region}'
|
|
pv = pd.pivot_table(
|
|
df, index='dataset', columns='method', values=column, margins=True
|
|
)
|
|
pv['n_classes'] = pv.index.map(n_classes).astype('Int64')
|
|
pv['tr_size'] = pv.index.map(tr_size).astype('Int64')
|
|
#pv['tr-prev'] = pv.index.map(tr_prev)
|
|
pv = pv.drop(columns=[col for col in pv.columns if col == "All" or col[-1]=='All'])
|
|
print(f'{problem_type=} {column=}')
|
|
print(pv)
|
|
print('-'*80)
|
|
|
|
latex = LatexTable.from_dataframe(df, method='method', benchmark='dataset', value=column, name=column)
|
|
latex.format.configuration.show_std = False
|
|
#latex.reorder_methods([nicer_method(m) for m in methods])
|
|
latex.reorder_benchmarks([nicer_data(d) for d in dataset_order])
|
|
if column in ['ae', 'SRE']:
|
|
latex.format.configuration.lower_is_better = True
|
|
latex.format.configuration.stat_test = 'wilcoxon'
|
|
#latex.format.configuration.stat_test = None
|
|
# latex.format.configuration.show_std = True
|
|
|
|
if column in [f'c-{region}', f'cs-{region}']:
|
|
latex.format.configuration.lower_is_better = False
|
|
latex.format.configuration.stat_test = None
|
|
latex.format.configuration.with_color = False
|
|
latex.format.configuration.best_in_bold = False
|
|
latex.format.configuration.with_rank = False
|
|
latex.format.configuration.mean_prec = 0
|
|
latex.add_format_modifier(
|
|
format_modifier=FormatModifierSelectColor(
|
|
comparison=SelectGreaterThan(reference_selector=89, input_selector=SelectByName())
|
|
)
|
|
)
|
|
if column in [f'a-{region}']:
|
|
latex.format.configuration.lower_is_better = True
|
|
latex.format.configuration.stat_test = None
|
|
latex.format.configuration.with_color = False
|
|
latex.format.configuration.best_in_bold = False
|
|
latex.format.configuration.mean_prec = 2
|
|
latex.add_format_modifier(
|
|
format_modifier=FormatModifierSelectColor(
|
|
comparison=SelectSmallerThan(reference_selector=11, input_selector=SelectByName())
|
|
)
|
|
)
|
|
# latex.add_format_modifier(
|
|
# format_modifier=FormatModifierSelectColor(
|
|
# comparison=SelectSmallerThan(reference_selector=0.01, input_selector=SelectByName()),
|
|
# intensity=50
|
|
# )
|
|
# )
|
|
|
|
latex.format.configuration.resizebox=.5
|
|
latex.latexPDF(pdf_path=f'./tables/{latex.name}.pdf')
|
|
|
|
|
|
df = df[df['method']!='BaACC']
|
|
df = df[df['method']!='BaACC*']
|
|
df = df[df['method']!='BaACC+']
|
|
df = df[df['method']!='BaKDE-Ait*']
|
|
df = df[df['method']!='BaKDE-Gau*']
|
|
df = df[df['method']!='BayEMQ*']
|
|
grouped = df.groupby(["method", "dataset"])
|
|
agg = grouped.agg(
|
|
ae_mean=("ae", "mean"),
|
|
ae_std=("ae", "std"),
|
|
sre_mean=("SRE", "mean"),
|
|
sre_std=("SRE", "std"),
|
|
coverage_mean=("c-CI", "mean"),
|
|
coverage_std=("c-CI", "std"),
|
|
coverage_soft_mean=("cs-CI", "mean"),
|
|
amplitude_mean=("a-CI", "mean"),
|
|
amplitude_std=("a-CI", "std"),
|
|
).reset_index()
|
|
|
|
#plot_coverage_vs_amplitude(
|
|
# agg,
|
|
# coverage_col="coverage_soft_mean",
|
|
# amplitude_col="amplitude_mean",
|
|
# method_col="method",
|
|
# dataset_col="dataset",
|
|
# nominal_coverage=0.95,
|
|
# title="Marginal coverage vs amplitude"
|
|
#)
|
|
|
|
|
|
#print('RESTITUIR EL WILCOXON')
|