From ae9503a43b7e505f3278ffecea75483ba159cc72 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Fri, 9 Jan 2026 17:20:05 +0100 Subject: [PATCH] calibrating temperature and coming back to Aitchison kernel --- BayesianKDEy/TODO.txt | 22 +++- BayesianKDEy/_bayeisan_kdey.py | 2 +- BayesianKDEy/full_experiments.py | 24 +++- BayesianKDEy/generate_results.py | 79 +++++++++---- BayesianKDEy/single_experiment_debug.py | 15 ++- BayesianKDEy/temperature_bandwidth_corr.py | 129 +++++++++++++++++++++ BayesianKDEy/temperature_calibration.py | 114 ++++++++++++++++++ quapy/data/datasets.py | 23 +++- quapy/error.py | 10 +- quapy/method/confidence.py | 4 +- 10 files changed, 376 insertions(+), 46 deletions(-) create mode 100644 BayesianKDEy/temperature_bandwidth_corr.py create mode 100644 BayesianKDEy/temperature_calibration.py diff --git a/BayesianKDEy/TODO.txt b/BayesianKDEy/TODO.txt index 9aea717..c75a2e1 100644 --- a/BayesianKDEy/TODO.txt +++ b/BayesianKDEy/TODO.txt @@ -1,7 +1,23 @@ -- Add other methods that natively provide uncertainty quantification methods? +- Add other methods that natively provide uncertainty quantification methods? (e.g., Ratio estimator, Card & Smith) - MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever) - Implement Interval Score or Winkler Score - analyze across shift -- add Bayesian EM -- optimize also C and class_weight? +- add Bayesian EM: + - https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/mapls.py + - take this opportunity to add RLLS: https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/rlls.py +- add CIFAR10 and MNIST? Maybe consider also previously tested types of shift (tweak-one-out, etc.)? from RLLS paper + - https://github.com/Angie-Liu/labelshift/tree/master + - https://github.com/Angie-Liu/labelshift/blob/master/cifar10_for_labelshift.py + - Note: MNIST is downloadable from https://archive.ics.uci.edu/dataset/683/mnist+database+of+handwritten+digits +- consider prior knowledge in experiments: + - One scenario in which our prior is uninformative (i.e., uniform) + - One scenario in which our prior is wrong (e.g., alpha-prior = (3,2,1), protocol-prior=(1,1,5)) + - One scenario in which our prior is very good (e.g., alpha-prior = (3,2,1), protocol-prior=(3,2,1)) + - Do all my baseline methods come with the option to inform a prior? +- consider different bandwidths within the bayesian approach? +- how to improve the coverage (or how to increase the amplitude)? + - Added temperature-calibration, improve things. + - Is temperature-calibration actually not equivalent to using a larger bandwidth in the kernels? +- consider W as a measure of quantification error (the current e.g., w-CI is the winkler...) +- optimize also C and class_weight? [I don't think so, but could be done easily now] diff --git a/BayesianKDEy/_bayeisan_kdey.py b/BayesianKDEy/_bayeisan_kdey.py index 69b6523..9a4f171 100644 --- a/BayesianKDEy/_bayeisan_kdey.py +++ b/BayesianKDEy/_bayeisan_kdey.py @@ -66,7 +66,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC): raise ValueError(f'parameter {num_samples=} must be a positive integer') assert explore in ['simplex', 'clr', 'ilr'], \ f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"' - assert temperature>0., f'temperature must be >0' + # assert temperature>0., f'temperature must be >0' assert engine in ['rw-mh', 'emcee', 'numpyro'] super().__init__(classifier, fit_classifier, val_split) diff --git a/BayesianKDEy/full_experiments.py b/BayesianKDEy/full_experiments.py index 4b0afa2..47c4155 100644 --- a/BayesianKDEy/full_experiments.py +++ b/BayesianKDEy/full_experiments.py @@ -9,6 +9,7 @@ from sklearn.model_selection import GridSearchCV, StratifiedKFold from copy import deepcopy as cp import quapy as qp from BayesianKDEy._bayeisan_kdey import BayesianKDEy +from BayesianKDEy.temperature_calibration import temp_calibration from build.lib.quapy.data import LabelledCollection from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ from quapy.method.base import BinaryQuantifier, BaseQuantifier @@ -72,14 +73,22 @@ def methods(): # yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary # # yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method - # yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method + yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method # yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method # yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method # yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method # yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass # yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method - yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, step_size=.1, engine='numpyro', **hyper), multiclass_method + yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method + yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method + yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method + yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method + yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method + yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method + yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method + yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method + yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict): @@ -114,7 +123,12 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method ) t_init = time() - withconf_quantifier = withconf_constructor(best_hyperparams).fit(*training.Xy) + withconf_quantifier = withconf_constructor(best_hyperparams) + if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None: + train, val = data.training.split_stratified(train_prop=0.6, random_state=0) + temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1) + withconf_quantifier.temperature = temperature + withconf_quantifier.fit(*training.Xy) tr_time = time() - t_init # test @@ -155,13 +169,13 @@ if __name__ == '__main__': binary = { 'datasets': qp.datasets.UCI_BINARY_DATASETS, 'fetch_fn': qp.datasets.fetch_UCIBinaryDataset, - 'sample_size': 100 # previous: 500 + 'sample_size': 500 # previous: small 100, big 500 } multiclass = { 'datasets': qp.datasets.UCI_MULTICLASS_DATASETS, 'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset, - 'sample_size': 200 # previous: 1000 + 'sample_size': 1000 # previous: small 200, big 1000 } result_dir = Path('./results') diff --git a/BayesianKDEy/generate_results.py b/BayesianKDEy/generate_results.py index 5604af4..862832a 100644 --- a/BayesianKDEy/generate_results.py +++ b/BayesianKDEy/generate_results.py @@ -80,7 +80,30 @@ def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwar # methods = None # show all methods -methods = ['BayesianACC', 'BayesianKDEy', 'BaKDE-emcee', 'BaKDE-numpyro'] +methods = ['BayesianACC', #'BayesianKDEy', + #'BaKDE-emcee', + # 'BaKDE-numpyro', + # 'BaKDE-numpyro-T2', + # 'BaKDE-numpyro-T10', + # 'BaKDE-numpyro-T*', + # 'BaKDE-Ait-numpyro', + 'BaKDE-Ait-numpyro-T*', + # 'BaKDE-Ait-numpyro-T*ILR', + 'BootstrapACC', + 'BootstrapHDy', + 'BootstrapKDEy' + ] + +def nicer(name:str): + replacements = { + 'Bayesian': 'Ba', + 'Bootstrap': 'Bo', + 'numpyro': 'ro', + 'emcee': 'emc', + } + for k, v in replacements.items(): + name = name.replace(k,v) + return name for setup in ['multiclass']: path = f'./results/{setup}/*.pkl' @@ -93,7 +116,7 @@ for setup in ['multiclass']: report = pickle.load(open(file, 'rb')) results = report['results'] n_samples = len(results['ae']) - table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples) + table['method'].extend([nicer(method)] * n_samples) table['dataset'].extend([dataset] * n_samples) table['ae'].extend(results['ae']) table['rae'].extend(results['rae']) @@ -101,28 +124,29 @@ for setup in ['multiclass']: # table['a-CI'].extend(results['amplitude']) update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True) - update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex) - update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR) - update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR) + # update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex) + # update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR) + # update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR) table['c-CI'].extend(report['coverage-CI']) table['a-CI'].extend(report['amplitude-CI']) table['w-CI'].extend(report['winkler-CI']) table['amperr-CI'].extend(report['amperr-CI']) - table['c-CE'].extend(report['coverage-CE']) - table['a-CE'].extend(report['amplitude-CE']) - table['amperr-CE'].extend(report['amperr-CE']) + # table['c-CE'].extend(report['coverage-CE']) + # table['a-CE'].extend(report['amplitude-CE']) + # table['amperr-CE'].extend(report['amperr-CE']) - table['c-CLR'].extend(report['coverage-CLR']) - table['a-CLR'].extend(report['amplitude-CLR']) - table['amperr-CLR'].extend(report['amperr-CLR']) + # table['c-CLR'].extend(report['coverage-CLR']) + # table['a-CLR'].extend(report['amplitude-CLR']) + # table['amperr-CLR'].extend(report['amperr-CLR']) - table['c-ILR'].extend(report['coverage-ILR']) - table['a-ILR'].extend(report['amplitude-ILR']) - table['amperr-ILR'].extend(report['amperr-ILR']) + # table['c-ILR'].extend(report['coverage-ILR']) + # table['a-ILR'].extend(report['amplitude-ILR']) + # table['amperr-ILR'].extend(report['amperr-ILR']) table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim'])) + table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001)) # table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']])) # table['aitch'].extend() # table['reg-score-ILR'].extend( @@ -145,16 +169,20 @@ for setup in ['multiclass']: tr_size[dataset] = len(data.training) # remove datasets with more than max_classes classes - # max_classes = 30 - # min_train = 1000 - # for data_name, n in n_classes.items(): - # if n > max_classes: - # df = df[df["dataset"] != data_name] - # for data_name, n in tr_size.items(): - # if n < min_train: - # df = df[df["dataset"] != data_name] + max_classes = 25 + min_train = 500 + ignore_datasets = ['poker_hand', 'hcv'] + for data_name, n in n_classes.items(): + if n > max_classes: + df = df[df["dataset"] != data_name] + for data_name, n in tr_size.items(): + if n < min_train: + df = df[df["dataset"] != data_name] + for data_name, n in tr_size.items(): + if data_name in ignore_datasets: + df = df[df["dataset"] != data_name] - for region in ['CI', 'CE', 'CLR', 'ILR']: + for region in ['CI']: #, 'CLR', 'ILR', 'CI']: if setup == 'binary' and region=='ILR': continue # pv = pd.pivot_table( @@ -162,11 +190,12 @@ for setup in ['multiclass']: # ) pv = pd.pivot_table( df, index='dataset', columns='method', values=[ - f'amperr-{region}', + # f'amperr-{region}', f'a-{region}', f'c-{region}', - #f'w-{region}', + # f'w-{region}', 'ae', + 'SRE', # 'rae', # f'aitch', # f'aitch-well' diff --git a/BayesianKDEy/single_experiment_debug.py b/BayesianKDEy/single_experiment_debug.py index 7056fc7..18a8f8f 100644 --- a/BayesianKDEy/single_experiment_debug.py +++ b/BayesianKDEy/single_experiment_debug.py @@ -1,4 +1,5 @@ import os +import pickle import warnings from os.path import join from pathlib import Path @@ -27,7 +28,7 @@ from scipy.stats import dirichlet from collections import defaultdict from time import time from sklearn.base import clone, BaseEstimator - +from temperature_calibration import temp_calibration def methods(): """ @@ -50,7 +51,8 @@ def methods(): # for T in [10, 20, 50, 100., 500]: # yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper), - yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper), + # yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper), + yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', **hyper) @@ -68,8 +70,8 @@ if __name__ == '__main__': setup = multiclass # data_name = 'isolet' - data_name = 'academic-success' - # data_name = 'abalone' + # data_name = 'academic-success' + data_name = 'poker_hand' qp.environ['SAMPLE_SIZE'] = setup['sample_size'] print(f'dataset={data_name}') @@ -86,6 +88,11 @@ if __name__ == '__main__': f'coverage={report["results"]["coverage"].mean():.5f}, ' f'amplitude={report["results"]["amplitude"].mean():.5f}, ') + # best_hyperparams = pickle.load(open(hyper_path, 'rb')) + # method = withconf_constructor(best_hyperparams) + # + # train, val = data.training.split_stratified(train_prop=0.6, random_state=0) + # temp_calibration(method, train, val, amplitude_threshold=0.1475, temp_grid=[2., 10., 100., 1000.])#temp_grid=[1., 1.25, 1.5, 1.75, 2.]) diff --git a/BayesianKDEy/temperature_bandwidth_corr.py b/BayesianKDEy/temperature_bandwidth_corr.py new file mode 100644 index 0000000..562649f --- /dev/null +++ b/BayesianKDEy/temperature_bandwidth_corr.py @@ -0,0 +1,129 @@ +import os.path +import pickle +from collections import defaultdict +from pathlib import Path +import pandas as pd +import quapy as qp +from BayesianKDEy._bayeisan_kdey import BayesianKDEy +from BayesianKDEy.full_experiments import experiment_path +from quapy.protocol import UPP +import numpy as np +from tqdm import tqdm +from joblib import Parallel, delayed +from itertools import product + +# is there a correspondence between smaller bandwidths and overconfident likelihoods? if so, a high temperature +# after calibration might be correlated; this script aims at analyzing this trend + +datasets = qp.datasets.UCI_MULTICLASS_DATASETS + +def show(results, values): + df_res = pd.DataFrame(results) + df_mean = ( + df_res + .groupby(['bandwidth', 'temperature'], as_index=False) + .mean(numeric_only=True) + ) + pivot = df_mean.pivot( + index='bandwidth', + columns='temperature', + values=values + ) + import matplotlib.pyplot as plt + + + plt.imshow(pivot, origin='lower', aspect='auto') + plt.colorbar(label=values) + + plt.xticks(range(len(pivot.columns)), pivot.columns) + plt.yticks(range(len(pivot.index)), pivot.index) + + plt.xlabel('Temperature') + plt.ylabel('Bandwidth') + plt.title(f'{values} heatmap') + + plt.savefig(f'./plotcorr/{values}.png') + plt.cla() + plt.clf() + +def run_experiment( + bandwidth, + temperature, + train, + test, + dataset, +): + qp.environ['SAMPLE_SIZE'] = 1000 + bakde = BayesianKDEy( + engine='numpyro', + bandwidth=bandwidth, + temperature=temperature, + ) + bakde.fit(*train.Xy) + + test_generator = UPP(test, repeats=20, random_state=0) + + rows = [] + + for i, (sample, prev) in enumerate( + tqdm( + test_generator(), + desc=f"bw={bandwidth}, T={temperature}", + total=test_generator.total(), + leave=False, + ) + ): + point_estimate, region = bakde.predict_conf(sample) + + rows.append({ + "bandwidth": bandwidth, + "temperature": temperature, + "dataset": dataset, + "sample": i, + "mae": qp.error.mae(prev, point_estimate), + "cov": region.coverage(prev), + "amp": region.montecarlo_proportion(n_trials=50_000), + }) + + return rows + + +bandwidths = [0.001, 0.005, 0.01, 0.05, 0.1] +temperatures = [0.5, 0.75, 1., 2., 5.] + +res_dir = './plotcorr/results' +os.makedirs(res_dir, exist_ok=True) + +all_rows = [] +for i, dataset in enumerate(datasets): + if dataset in ['letter', 'isolet']: continue + res_path = f'{res_dir}/{dataset}.pkl' + if os.path.exists(res_path): + print(f'loading results from pickle {res_path}') + results_data_rows = pickle.load(open(res_path, 'rb')) + else: + print(f'{dataset=} [complete={i}/{len(datasets)}]') + data = qp.datasets.fetch_UCIMulticlassDataset(dataset) + train, test = data.train_test + jobs = list(product(bandwidths, temperatures)) + results_data_rows = Parallel(n_jobs=-1,backend="loky")( + delayed(run_experiment)(bw, T, train, test, dataset) for bw, T in jobs + ) + pickle.dump(results_data_rows, open(res_path, 'wb'), pickle.HIGHEST_PROTOCOL) + all_rows.extend(results_data_rows) + +results = defaultdict(list) +for rows in all_rows: + for row in rows: + for k, v in row.items(): + results[k].append(v) + +show(results, 'mae') +show(results, 'cov') +show(results, 'amp') + + + + + + diff --git a/BayesianKDEy/temperature_calibration.py b/BayesianKDEy/temperature_calibration.py new file mode 100644 index 0000000..6153c05 --- /dev/null +++ b/BayesianKDEy/temperature_calibration.py @@ -0,0 +1,114 @@ +from build.lib.quapy.data import LabelledCollection +from quapy.method.confidence import WithConfidenceABC +from quapy.protocol import UPP +import numpy as np +from tqdm import tqdm +import quapy as qp +from joblib import Parallel, delayed +import copy + + + + +def temp_calibration(method:WithConfidenceABC, + train:LabelledCollection, + val:LabelledCollection, + temp_grid=[1, 1.5, 2], + num_samples=100, + nominal_coverage=0.95, + amplitude_threshold='auto', + random_state=0, + n_jobs=1, + verbose=True): + + assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold < 1.), \ + f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float < 1.0.' + + if amplitude_threshold=='auto': + n_classes = train.n_classes + amplitude_threshold = .1/np.log(n_classes+1) + + if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1: + print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions') + + + method.fit(*train.Xy) + label_shift_prot = UPP(val, repeats=num_samples, random_state=random_state) + + # results = [] + # temp_grid = sorted(temp_grid) + # for temp in temp_grid: + # method.temperature = temp + # coverage = 0 + # amplitudes = [] + # errs = [] + # pbar = tqdm(enumerate(label_shift_prot()), total=label_shift_prot.total(), disable=not verbose) + # for i, (sample, prev) in pbar: + # point_estim, conf_region = method.predict_conf(sample) + # if prev in conf_region: + # coverage += 1 + # amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000)) + # errs.append(qp.error.mae(prev, point_estim)) + # if verbose: + # pbar.set_description( + # f'temperature={temp:.2f}, ' + # f'coverage={coverage/(i+1):.4f}, ' + # f'amplitude={np.mean(amplitudes):.4f},' + # f'mae={np.mean(errs):.4f}' + # ) + # + # mean_coverage = coverage / label_shift_prot.total() + # mean_amplitude = np.mean(amplitudes) + # + # if mean_amplitude < amplitude_threshold: + # results.append((temp, mean_coverage, mean_amplitude)) + # else: + # break + + def evaluate_temperature(temp): + local_method = copy.deepcopy(method) + local_method.temperature = temp + + coverage = 0 + amplitudes = [] + errs = [] + + for i, (sample, prev) in enumerate(label_shift_prot()): + point_estim, conf_region = local_method.predict_conf(sample) + + if prev in conf_region: + coverage += 1 + + amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000)) + errs.append(qp.error.mae(prev, point_estim)) + + mean_coverage = coverage / label_shift_prot.total() + mean_amplitude = np.mean(amplitudes) + + return temp, mean_coverage, mean_amplitude + + temp_grid = sorted(temp_grid) + + raw_results = Parallel(n_jobs=n_jobs, backend="loky")( + delayed(evaluate_temperature)(temp) + for temp in tqdm(temp_grid, disable=not verbose) + ) + results = [ + (temp, cov, amp) + for temp, cov, amp in raw_results + if amp < amplitude_threshold + ] + + chosen_temperature = 1. + if len(results) > 0: + chosen_temperature = min(results, key=lambda x: abs(x[1]-nominal_coverage))[0] + + print(f'chosen_temperature={chosen_temperature:.2f}') + + return chosen_temperature + + + + + + diff --git a/quapy/data/datasets.py b/quapy/data/datasets.py index 801b968..7dc81ec 100644 --- a/quapy/data/datasets.py +++ b/quapy/data/datasets.py @@ -738,14 +738,29 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas print(f'Loading UCI Muticlass {dataset_name} ({fullname})') file = join(data_home, 'uci_multiclass', dataset_name+'.pkl') - + + def dummify_categorical_features(df_features, dataset_id): + CATEGORICAL_FEATURES = { + 158: ["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5"], # poker_hand + } + + categorical = CATEGORICAL_FEATURES.get(dataset_id, []) + + X = df_features.copy() + if categorical: + X[categorical] = X[categorical].astype("category") + X = pd.get_dummies(X, columns=categorical, drop_first=True) + + return X + def download(id, name): df = fetch_ucirepo(id=id) - df.data.features = pd.get_dummies(df.data.features, drop_first=True) - X, y = df.data.features.to_numpy(dtype=np.float64), df.data.targets.to_numpy().squeeze() + X_df = dummify_categorical_features(df.data.features, id) + X = X_df.to_numpy(dtype=np.float64) + y = df.data.targets.to_numpy().squeeze() - assert y.ndim == 1, 'more than one y' + assert y.ndim == 1, f'error: the dataset {id=} {name=} has more than one target variable' classes = np.sort(np.unique(y)) y = np.searchsorted(classes, y) diff --git a/quapy/error.py b/quapy/error.py index 2407eb0..fb3459f 100644 --- a/quapy/error.py +++ b/quapy/error.py @@ -139,7 +139,8 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.): :math:`\\mathcal{Y}` are the classes of interest :param prevs_true: array-like, true prevalence values :param prevs_hat: array-like, estimated prevalence values - :param prevs_train: array-like, training prevalence values + :param prevs_train: array-like with the training prevalence values, or a single prevalence vector when + all the prevs_true and prevs_hat are computed on the same training set. :param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the training prevalence is expected to be >0 everywhere. :return: the squared ratio error @@ -149,6 +150,8 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.): prevs_train = np.asarray(prevs_train) assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}' assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence' + if prevs_true.ndim == 2 and prevs_train.ndim == 1: + prevs_train = np.tile(prevs_train, reps=(prevs_true.shape[0], 1)) if eps>0: prevs_true = smooth(prevs_true, eps) prevs_hat = smooth(prevs_hat, eps) @@ -157,12 +160,13 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.): N = prevs_true.shape[-1] w = prevs_true / prevs_train w_hat = prevs_hat / prevs_train - return (1./N) * (w - w_hat)**2. + return (1./N) * np.sum((w - w_hat)**2., axis=-1) def msre(prevs_true, prevs_hat, prevs_train, eps=0.): """ - Returns the mean across all experiments. See :function:`sre`. + Returns the mean :function:`sre` across all experiments. + :param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,) :param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true :param prevs_train: array-like, training prevalence values of (n_experiments, n_classes,) or (n_classes,) diff --git a/quapy/method/confidence.py b/quapy/method/confidence.py index 914b814..cc0673a 100644 --- a/quapy/method/confidence.py +++ b/quapy/method/confidence.py @@ -139,7 +139,7 @@ class WithConfidenceABC(ABC): """ Abstract class for confidence regions. """ - METHODS = ['intervals', 'ellipse', 'ellipse-clr'] + REGION_TYPE = ['intervals', 'ellipse', 'ellipse-clr', 'ellipse-ilr'] @abstractmethod def predict_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC): @@ -185,6 +185,8 @@ class WithConfidenceABC(ABC): region = ConfidenceEllipseSimplex(prev_estims, confidence_level=confidence_level) elif method == 'ellipse-clr': region = ConfidenceEllipseCLR(prev_estims, confidence_level=confidence_level) + elif method == 'ellipse-ilr': + region = ConfidenceEllipseILR(prev_estims, confidence_level=confidence_level) if region is None: raise NotImplementedError(f'unknown method {method}')