calibrating temperature and coming back to Aitchison kernel

This commit is contained in:
Alejandro Moreo Fernandez 2026-01-09 17:20:05 +01:00
parent e33b291357
commit ae9503a43b
10 changed files with 376 additions and 46 deletions

View File

@ -1,7 +1,23 @@
- Add other methods that natively provide uncertainty quantification methods?
- Add other methods that natively provide uncertainty quantification methods? (e.g., Ratio estimator, Card & Smith)
- MPIW (Mean Prediction Interval Width): is the average of the amplitudes (w/o aggregating coverage whatsoever)
- Implement Interval Score or Winkler Score
- analyze across shift
- add Bayesian EM
- optimize also C and class_weight?
- add Bayesian EM:
- https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/mapls.py
- take this opportunity to add RLLS: https://github.com/ChangkunYe/MAPLS/blob/main/label_shift/rlls.py
- add CIFAR10 and MNIST? Maybe consider also previously tested types of shift (tweak-one-out, etc.)? from RLLS paper
- https://github.com/Angie-Liu/labelshift/tree/master
- https://github.com/Angie-Liu/labelshift/blob/master/cifar10_for_labelshift.py
- Note: MNIST is downloadable from https://archive.ics.uci.edu/dataset/683/mnist+database+of+handwritten+digits
- consider prior knowledge in experiments:
- One scenario in which our prior is uninformative (i.e., uniform)
- One scenario in which our prior is wrong (e.g., alpha-prior = (3,2,1), protocol-prior=(1,1,5))
- One scenario in which our prior is very good (e.g., alpha-prior = (3,2,1), protocol-prior=(3,2,1))
- Do all my baseline methods come with the option to inform a prior?
- consider different bandwidths within the bayesian approach?
- how to improve the coverage (or how to increase the amplitude)?
- Added temperature-calibration, improve things.
- Is temperature-calibration actually not equivalent to using a larger bandwidth in the kernels?
- consider W as a measure of quantification error (the current e.g., w-CI is the winkler...)
- optimize also C and class_weight? [I don't think so, but could be done easily now]

View File

@ -66,7 +66,7 @@ class BayesianKDEy(AggregativeSoftQuantifier, KDEBase, WithConfidenceABC):
raise ValueError(f'parameter {num_samples=} must be a positive integer')
assert explore in ['simplex', 'clr', 'ilr'], \
f'unexpected value for param {explore=}; valid ones are "simplex", "clr", and "ilr"'
assert temperature>0., f'temperature must be >0'
# assert temperature>0., f'temperature must be >0'
assert engine in ['rw-mh', 'emcee', 'numpyro']
super().__init__(classifier, fit_classifier, val_split)

View File

@ -9,6 +9,7 @@ from sklearn.model_selection import GridSearchCV, StratifiedKFold
from copy import deepcopy as cp
import quapy as qp
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
from BayesianKDEy.temperature_calibration import temp_calibration
from build.lib.quapy.data import LabelledCollection
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ
from quapy.method.base import BinaryQuantifier, BaseQuantifier
@ -72,14 +73,22 @@ def methods():
# yield 'BayesianHDy', DMy(LR()), hdy_hyper, lambda hyper: PQ(LR(), stan_seed=0, **hyper), only_binary
#
# yield 'BootstrapKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: AggregativeBootstrap(KDEyML(LR(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method
# yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
yield 'BayesianKDEy', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, **hyper), multiclass_method
# yield 'BayesianKDEy*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, **hyper), multiclass_method
# yield 'BayKDEy*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.15, **hyper), multiclass_method
# yield 'BayKDEy*CLR2', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='clr', step_size=.05, **hyper), multiclass_method
# yield 'BayKDEy*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, explore='ilr', step_size=.15, **hyper), only_multiclass
# yield 'BayKDEy*ILR2', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, explore='ilr', step_size=.1, **hyper), only_multiclass
yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, engine='emcee', **hyper), multiclass_method
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, step_size=.1, engine='numpyro', **hyper), multiclass_method
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy( mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
yield f'BaKDE-numpyro-T2', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=2., **hyper), multiclass_method
yield f'BaKDE-numpyro-T*', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
yield f'BaKDE-Ait-numpyro', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
yield f'BaKDE-Ait-numpyro-T*', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, **hyper), multiclass_method
yield f'BaKDE-Ait-numpyro-T*ILR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, region='ellipse-ilr', **hyper), multiclass_method
yield f'BaKDE-numpyro-T10', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', temperature=10., **hyper), multiclass_method
yield f'BaKDE-numpyro*CLR', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
yield f'BaKDE-numpyro*ILR', KDEyILR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='ilr', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
def model_selection(train: LabelledCollection, point_quantifier: AggregativeQuantifier, grid: dict):
@ -114,7 +123,12 @@ def experiment(dataset: Dataset, point_quantifier: AggregativeQuantifier, method
)
t_init = time()
withconf_quantifier = withconf_constructor(best_hyperparams).fit(*training.Xy)
withconf_quantifier = withconf_constructor(best_hyperparams)
if hasattr(withconf_quantifier, 'temperature') and withconf_quantifier.temperature is None:
train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
temperature = temp_calibration(withconf_quantifier, train, val, temp_grid=[.5, 1., 1.5, 2., 5., 10., 100.], n_jobs=-1)
withconf_quantifier.temperature = temperature
withconf_quantifier.fit(*training.Xy)
tr_time = time() - t_init
# test
@ -155,13 +169,13 @@ if __name__ == '__main__':
binary = {
'datasets': qp.datasets.UCI_BINARY_DATASETS,
'fetch_fn': qp.datasets.fetch_UCIBinaryDataset,
'sample_size': 100 # previous: 500
'sample_size': 500 # previous: small 100, big 500
}
multiclass = {
'datasets': qp.datasets.UCI_MULTICLASS_DATASETS,
'fetch_fn': qp.datasets.fetch_UCIMulticlassDataset,
'sample_size': 200 # previous: 1000
'sample_size': 1000 # previous: small 200, big 1000
}
result_dir = Path('./results')

View File

@ -80,7 +80,30 @@ def update_pickle_with_region(report, file, conf_name, conf_region_class, **kwar
# methods = None # show all methods
methods = ['BayesianACC', 'BayesianKDEy', 'BaKDE-emcee', 'BaKDE-numpyro']
methods = ['BayesianACC', #'BayesianKDEy',
#'BaKDE-emcee',
# 'BaKDE-numpyro',
# 'BaKDE-numpyro-T2',
# 'BaKDE-numpyro-T10',
# 'BaKDE-numpyro-T*',
# 'BaKDE-Ait-numpyro',
'BaKDE-Ait-numpyro-T*',
# 'BaKDE-Ait-numpyro-T*ILR',
'BootstrapACC',
'BootstrapHDy',
'BootstrapKDEy'
]
def nicer(name:str):
replacements = {
'Bayesian': 'Ba',
'Bootstrap': 'Bo',
'numpyro': 'ro',
'emcee': 'emc',
}
for k, v in replacements.items():
name = name.replace(k,v)
return name
for setup in ['multiclass']:
path = f'./results/{setup}/*.pkl'
@ -93,7 +116,7 @@ for setup in ['multiclass']:
report = pickle.load(open(file, 'rb'))
results = report['results']
n_samples = len(results['ae'])
table['method'].extend([method.replace('Bayesian','Ba').replace('Bootstrap', 'Bo')] * n_samples)
table['method'].extend([nicer(method)] * n_samples)
table['dataset'].extend([dataset] * n_samples)
table['ae'].extend(results['ae'])
table['rae'].extend(results['rae'])
@ -101,28 +124,29 @@ for setup in ['multiclass']:
# table['a-CI'].extend(results['amplitude'])
update_pickle_with_region(report, file, conf_name='CI', conf_region_class=ConfidenceIntervals, bonferroni_correction=True)
update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
# update_pickle_with_region(report, file, conf_name='CE', conf_region_class=ConfidenceEllipseSimplex)
# update_pickle_with_region(report, file, conf_name='CLR', conf_region_class=ConfidenceEllipseCLR)
# update_pickle_with_region(report, file, conf_name='ILR', conf_region_class=ConfidenceEllipseILR)
table['c-CI'].extend(report['coverage-CI'])
table['a-CI'].extend(report['amplitude-CI'])
table['w-CI'].extend(report['winkler-CI'])
table['amperr-CI'].extend(report['amperr-CI'])
table['c-CE'].extend(report['coverage-CE'])
table['a-CE'].extend(report['amplitude-CE'])
table['amperr-CE'].extend(report['amperr-CE'])
# table['c-CE'].extend(report['coverage-CE'])
# table['a-CE'].extend(report['amplitude-CE'])
# table['amperr-CE'].extend(report['amperr-CE'])
table['c-CLR'].extend(report['coverage-CLR'])
table['a-CLR'].extend(report['amplitude-CLR'])
table['amperr-CLR'].extend(report['amperr-CLR'])
# table['c-CLR'].extend(report['coverage-CLR'])
# table['a-CLR'].extend(report['amplitude-CLR'])
# table['amperr-CLR'].extend(report['amperr-CLR'])
table['c-ILR'].extend(report['coverage-ILR'])
table['a-ILR'].extend(report['amplitude-ILR'])
table['amperr-ILR'].extend(report['amperr-ILR'])
# table['c-ILR'].extend(report['coverage-ILR'])
# table['a-ILR'].extend(report['amplitude-ILR'])
# table['amperr-ILR'].extend(report['amperr-ILR'])
table['aitch'].extend(qp.error.dist_aitchison(results['true-prevs'], results['point-estim']))
table['SRE'].extend(qp.error.sre(results['true-prevs'], results['point-estim'], report['train-prev'], eps=0.001))
# table['aitch-well'].extend(qp.error.dist_aitchison(results['true-prevs'], [ConfidenceEllipseILR(samples).mean_ for samples in results['samples']]))
# table['aitch'].extend()
# table['reg-score-ILR'].extend(
@ -145,16 +169,20 @@ for setup in ['multiclass']:
tr_size[dataset] = len(data.training)
# remove datasets with more than max_classes classes
# max_classes = 30
# min_train = 1000
# for data_name, n in n_classes.items():
# if n > max_classes:
# df = df[df["dataset"] != data_name]
# for data_name, n in tr_size.items():
# if n < min_train:
# df = df[df["dataset"] != data_name]
max_classes = 25
min_train = 500
ignore_datasets = ['poker_hand', 'hcv']
for data_name, n in n_classes.items():
if n > max_classes:
df = df[df["dataset"] != data_name]
for data_name, n in tr_size.items():
if n < min_train:
df = df[df["dataset"] != data_name]
for data_name, n in tr_size.items():
if data_name in ignore_datasets:
df = df[df["dataset"] != data_name]
for region in ['CI', 'CE', 'CLR', 'ILR']:
for region in ['CI']: #, 'CLR', 'ILR', 'CI']:
if setup == 'binary' and region=='ILR':
continue
# pv = pd.pivot_table(
@ -162,11 +190,12 @@ for setup in ['multiclass']:
# )
pv = pd.pivot_table(
df, index='dataset', columns='method', values=[
f'amperr-{region}',
# f'amperr-{region}',
f'a-{region}',
f'c-{region}',
#f'w-{region}',
# f'w-{region}',
'ae',
'SRE',
# 'rae',
# f'aitch',
# f'aitch-well'

View File

@ -1,4 +1,5 @@
import os
import pickle
import warnings
from os.path import join
from pathlib import Path
@ -27,7 +28,7 @@ from scipy.stats import dirichlet
from collections import defaultdict
from time import time
from sklearn.base import clone, BaseEstimator
from temperature_calibration import temp_calibration
def methods():
"""
@ -50,7 +51,8 @@ def methods():
# for T in [10, 20, 50, 100., 500]:
# yield f'BaKDE-CLR-T{T}', KDEyCLR(LR()), kdey_hyper_clr, lambda hyper: BayesianKDEy(kernel='aitchison', explore='ilr', mcmc_seed=0, temperature=T, num_warmup=3000, num_samples=1000, step_size=.1, **hyper),
yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
# yield f'BaKDE-emcee', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, num_warmup=100, num_samples=100, step_size=.1, **hyper),
yield f'BaKDE-numpyro', KDEyML(LR()), kdey_hyper, lambda hyper: BayesianKDEy(mcmc_seed=0, engine='numpyro', **hyper)
@ -68,8 +70,8 @@ if __name__ == '__main__':
setup = multiclass
# data_name = 'isolet'
data_name = 'academic-success'
# data_name = 'abalone'
# data_name = 'academic-success'
data_name = 'poker_hand'
qp.environ['SAMPLE_SIZE'] = setup['sample_size']
print(f'dataset={data_name}')
@ -86,6 +88,11 @@ if __name__ == '__main__':
f'coverage={report["results"]["coverage"].mean():.5f}, '
f'amplitude={report["results"]["amplitude"].mean():.5f}, ')
# best_hyperparams = pickle.load(open(hyper_path, 'rb'))
# method = withconf_constructor(best_hyperparams)
#
# train, val = data.training.split_stratified(train_prop=0.6, random_state=0)
# temp_calibration(method, train, val, amplitude_threshold=0.1475, temp_grid=[2., 10., 100., 1000.])#temp_grid=[1., 1.25, 1.5, 1.75, 2.])

View File

@ -0,0 +1,129 @@
import os.path
import pickle
from collections import defaultdict
from pathlib import Path
import pandas as pd
import quapy as qp
from BayesianKDEy._bayeisan_kdey import BayesianKDEy
from BayesianKDEy.full_experiments import experiment_path
from quapy.protocol import UPP
import numpy as np
from tqdm import tqdm
from joblib import Parallel, delayed
from itertools import product
# is there a correspondence between smaller bandwidths and overconfident likelihoods? if so, a high temperature
# after calibration might be correlated; this script aims at analyzing this trend
datasets = qp.datasets.UCI_MULTICLASS_DATASETS
def show(results, values):
df_res = pd.DataFrame(results)
df_mean = (
df_res
.groupby(['bandwidth', 'temperature'], as_index=False)
.mean(numeric_only=True)
)
pivot = df_mean.pivot(
index='bandwidth',
columns='temperature',
values=values
)
import matplotlib.pyplot as plt
plt.imshow(pivot, origin='lower', aspect='auto')
plt.colorbar(label=values)
plt.xticks(range(len(pivot.columns)), pivot.columns)
plt.yticks(range(len(pivot.index)), pivot.index)
plt.xlabel('Temperature')
plt.ylabel('Bandwidth')
plt.title(f'{values} heatmap')
plt.savefig(f'./plotcorr/{values}.png')
plt.cla()
plt.clf()
def run_experiment(
bandwidth,
temperature,
train,
test,
dataset,
):
qp.environ['SAMPLE_SIZE'] = 1000
bakde = BayesianKDEy(
engine='numpyro',
bandwidth=bandwidth,
temperature=temperature,
)
bakde.fit(*train.Xy)
test_generator = UPP(test, repeats=20, random_state=0)
rows = []
for i, (sample, prev) in enumerate(
tqdm(
test_generator(),
desc=f"bw={bandwidth}, T={temperature}",
total=test_generator.total(),
leave=False,
)
):
point_estimate, region = bakde.predict_conf(sample)
rows.append({
"bandwidth": bandwidth,
"temperature": temperature,
"dataset": dataset,
"sample": i,
"mae": qp.error.mae(prev, point_estimate),
"cov": region.coverage(prev),
"amp": region.montecarlo_proportion(n_trials=50_000),
})
return rows
bandwidths = [0.001, 0.005, 0.01, 0.05, 0.1]
temperatures = [0.5, 0.75, 1., 2., 5.]
res_dir = './plotcorr/results'
os.makedirs(res_dir, exist_ok=True)
all_rows = []
for i, dataset in enumerate(datasets):
if dataset in ['letter', 'isolet']: continue
res_path = f'{res_dir}/{dataset}.pkl'
if os.path.exists(res_path):
print(f'loading results from pickle {res_path}')
results_data_rows = pickle.load(open(res_path, 'rb'))
else:
print(f'{dataset=} [complete={i}/{len(datasets)}]')
data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
train, test = data.train_test
jobs = list(product(bandwidths, temperatures))
results_data_rows = Parallel(n_jobs=-1,backend="loky")(
delayed(run_experiment)(bw, T, train, test, dataset) for bw, T in jobs
)
pickle.dump(results_data_rows, open(res_path, 'wb'), pickle.HIGHEST_PROTOCOL)
all_rows.extend(results_data_rows)
results = defaultdict(list)
for rows in all_rows:
for row in rows:
for k, v in row.items():
results[k].append(v)
show(results, 'mae')
show(results, 'cov')
show(results, 'amp')

View File

@ -0,0 +1,114 @@
from build.lib.quapy.data import LabelledCollection
from quapy.method.confidence import WithConfidenceABC
from quapy.protocol import UPP
import numpy as np
from tqdm import tqdm
import quapy as qp
from joblib import Parallel, delayed
import copy
def temp_calibration(method:WithConfidenceABC,
train:LabelledCollection,
val:LabelledCollection,
temp_grid=[1, 1.5, 2],
num_samples=100,
nominal_coverage=0.95,
amplitude_threshold='auto',
random_state=0,
n_jobs=1,
verbose=True):
assert (amplitude_threshold == 'auto' or (isinstance(amplitude_threshold, float)) and amplitude_threshold < 1.), \
f'wrong value for {amplitude_threshold=}, it must either be "auto" or a float < 1.0.'
if amplitude_threshold=='auto':
n_classes = train.n_classes
amplitude_threshold = .1/np.log(n_classes+1)
if isinstance(amplitude_threshold, float) and amplitude_threshold > 0.1:
print(f'warning: the {amplitude_threshold=} is too large; this may lead to uninformative regions')
method.fit(*train.Xy)
label_shift_prot = UPP(val, repeats=num_samples, random_state=random_state)
# results = []
# temp_grid = sorted(temp_grid)
# for temp in temp_grid:
# method.temperature = temp
# coverage = 0
# amplitudes = []
# errs = []
# pbar = tqdm(enumerate(label_shift_prot()), total=label_shift_prot.total(), disable=not verbose)
# for i, (sample, prev) in pbar:
# point_estim, conf_region = method.predict_conf(sample)
# if prev in conf_region:
# coverage += 1
# amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
# errs.append(qp.error.mae(prev, point_estim))
# if verbose:
# pbar.set_description(
# f'temperature={temp:.2f}, '
# f'coverage={coverage/(i+1):.4f}, '
# f'amplitude={np.mean(amplitudes):.4f},'
# f'mae={np.mean(errs):.4f}'
# )
#
# mean_coverage = coverage / label_shift_prot.total()
# mean_amplitude = np.mean(amplitudes)
#
# if mean_amplitude < amplitude_threshold:
# results.append((temp, mean_coverage, mean_amplitude))
# else:
# break
def evaluate_temperature(temp):
local_method = copy.deepcopy(method)
local_method.temperature = temp
coverage = 0
amplitudes = []
errs = []
for i, (sample, prev) in enumerate(label_shift_prot()):
point_estim, conf_region = local_method.predict_conf(sample)
if prev in conf_region:
coverage += 1
amplitudes.append(conf_region.montecarlo_proportion(n_trials=50_000))
errs.append(qp.error.mae(prev, point_estim))
mean_coverage = coverage / label_shift_prot.total()
mean_amplitude = np.mean(amplitudes)
return temp, mean_coverage, mean_amplitude
temp_grid = sorted(temp_grid)
raw_results = Parallel(n_jobs=n_jobs, backend="loky")(
delayed(evaluate_temperature)(temp)
for temp in tqdm(temp_grid, disable=not verbose)
)
results = [
(temp, cov, amp)
for temp, cov, amp in raw_results
if amp < amplitude_threshold
]
chosen_temperature = 1.
if len(results) > 0:
chosen_temperature = min(results, key=lambda x: abs(x[1]-nominal_coverage))[0]
print(f'chosen_temperature={chosen_temperature:.2f}')
return chosen_temperature

View File

@ -738,14 +738,29 @@ def fetch_UCIMulticlassLabelledCollection(dataset_name, data_home=None, min_clas
print(f'Loading UCI Muticlass {dataset_name} ({fullname})')
file = join(data_home, 'uci_multiclass', dataset_name+'.pkl')
def dummify_categorical_features(df_features, dataset_id):
CATEGORICAL_FEATURES = {
158: ["S1", "C1", "S2", "C2", "S3", "C3", "S4", "C4", "S5", "C5"], # poker_hand
}
categorical = CATEGORICAL_FEATURES.get(dataset_id, [])
X = df_features.copy()
if categorical:
X[categorical] = X[categorical].astype("category")
X = pd.get_dummies(X, columns=categorical, drop_first=True)
return X
def download(id, name):
df = fetch_ucirepo(id=id)
df.data.features = pd.get_dummies(df.data.features, drop_first=True)
X, y = df.data.features.to_numpy(dtype=np.float64), df.data.targets.to_numpy().squeeze()
X_df = dummify_categorical_features(df.data.features, id)
X = X_df.to_numpy(dtype=np.float64)
y = df.data.targets.to_numpy().squeeze()
assert y.ndim == 1, 'more than one y'
assert y.ndim == 1, f'error: the dataset {id=} {name=} has more than one target variable'
classes = np.sort(np.unique(y))
y = np.searchsorted(classes, y)

View File

@ -139,7 +139,8 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
:math:`\\mathcal{Y}` are the classes of interest
:param prevs_true: array-like, true prevalence values
:param prevs_hat: array-like, estimated prevalence values
:param prevs_train: array-like, training prevalence values
:param prevs_train: array-like with the training prevalence values, or a single prevalence vector when
all the prevs_true and prevs_hat are computed on the same training set.
:param eps: float, for smoothing the prevalence values. It is 0 by default (no smoothing), meaning the
training prevalence is expected to be >0 everywhere.
:return: the squared ratio error
@ -149,6 +150,8 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
prevs_train = np.asarray(prevs_train)
assert prevs_true.shape == prevs_hat.shape, f'wrong shape {prevs_true.shape=} vs {prevs_hat.shape=}'
assert prevs_true.shape[-1]==prevs_train.shape[-1], 'wrong shape for training prevalence'
if prevs_true.ndim == 2 and prevs_train.ndim == 1:
prevs_train = np.tile(prevs_train, reps=(prevs_true.shape[0], 1))
if eps>0:
prevs_true = smooth(prevs_true, eps)
prevs_hat = smooth(prevs_hat, eps)
@ -157,12 +160,13 @@ def sre(prevs_true, prevs_hat, prevs_train, eps=0.):
N = prevs_true.shape[-1]
w = prevs_true / prevs_train
w_hat = prevs_hat / prevs_train
return (1./N) * (w - w_hat)**2.
return (1./N) * np.sum((w - w_hat)**2., axis=-1)
def msre(prevs_true, prevs_hat, prevs_train, eps=0.):
"""
Returns the mean across all experiments. See :function:`sre`.
Returns the mean :function:`sre` across all experiments.
:param prevs_true: array-like, true prevalence values of shape (n_experiments, n_classes,) or (n_classes,)
:param prevs_hat: array-like, estimated prevalence values of shape equal to prevs_true
:param prevs_train: array-like, training prevalence values of (n_experiments, n_classes,) or (n_classes,)

View File

@ -139,7 +139,7 @@ class WithConfidenceABC(ABC):
"""
Abstract class for confidence regions.
"""
METHODS = ['intervals', 'ellipse', 'ellipse-clr']
REGION_TYPE = ['intervals', 'ellipse', 'ellipse-clr', 'ellipse-ilr']
@abstractmethod
def predict_conf(self, instances, confidence_level=0.95) -> (np.ndarray, ConfidenceRegionABC):
@ -185,6 +185,8 @@ class WithConfidenceABC(ABC):
region = ConfidenceEllipseSimplex(prev_estims, confidence_level=confidence_level)
elif method == 'ellipse-clr':
region = ConfidenceEllipseCLR(prev_estims, confidence_level=confidence_level)
elif method == 'ellipse-ilr':
region = ConfidenceEllipseILR(prev_estims, confidence_level=confidence_level)
if region is None:
raise NotImplementedError(f'unknown method {method}')