QuaPy/BayesianKDEy/full_experiments.py

from pathlib import Path

from sklearn.linear_model import LogisticRegression
from copy import deepcopy as cp
import quapy as qp
from BayesianKDEy.commons import KDEyReduce
from _bayeisan_kdey import BayesianKDEy
from _bayesian_mapls import BayesianMAPLS
from commons import experiment_path, KDEyCLR, RESULT_DIR, MockClassifierFromPosteriors, KDEyScaledB, KDEyFresh
# import datasets
from datasets import LeQuaHandler, UCIMulticlassHandler, DatasetHandler, VisualDataHandler, CIFAR100Handler
from temperature_calibration import temp_calibration
from build.lib.quapy.data import LabelledCollection
from quapy.method.aggregative import DistributionMatchingY as DMy, AggregativeQuantifier, EMQ, CC
from quapy.model_selection import GridSearchQ
from quapy.data import Dataset
from quapy.method.confidence import BayesianCC, AggregativeBootstrap
from quapy.method.aggregative import KDEyML, ACC
from quapy.protocol import UPP
import numpy as np
from tqdm import tqdm
from collections import defaultdict
from time import time


def methods(data_handler: DatasetHandler):
    """
    Returns a tuple (name, quantifier, hyperparams, bayesian/bootstrap_constructor), where:
    - name: is a str representing the name of the method (e.g., 'BayesianKDEy')
    - quantifier: is the base model (e.g., KDEyML())
    - hyperparams: is a dictionary for the quantifier (e.g., {'bandwidth': [0.001, 0.005, 0.01, 0.05, 0.1, 0.2]})
    - bayesian/bootstrap_constructor: is a function that instantiates the bayesian o bootstrap method with the
        quantifier with optimized hyperparameters
    """
    if False: #  isinstance(data_handler, VisualDataHandler):
        Cls = MockClassifierFromPosteriors
        cls_hyper = {}
        val_split = data_handler.get_validation().Xy  # use this specific collection
        pass
    else:
        Cls = LogisticRegression
        cls_hyper = {'classifier__C': np.logspace(-4,4,9), 'classifier__class_weight': ['balanced', None]}
        val_split = 5  # k-fold cross-validation
    acc_hyper = cls_hyper
    # emq_hyper = {'calib': ['nbvs', 'bcts', 'ts', 'vs'], **cls_hyper}
    hdy_hyper = {'nbins': [3,4,5,8,16,32], **cls_hyper}
    kdey_hyper = {'bandwidth': np.logspace(-3, -1, 10), **cls_hyper}
    kdey_hyper_clr = {'bandwidth': np.logspace(-2, 2, 10), **cls_hyper}

    multiclass_method = 'multiclass'
    only_binary = 'only_binary'
    only_multiclass = 'only_multiclass'

    # surrogate quantifiers
    acc = ACC(Cls(), val_split=val_split)
    hdy = DMy(Cls(), val_split=val_split)
    kde_gau = KDEyML(Cls(), val_split=val_split)
    kde_gau_scale = KDEyScaledB(Cls(), val_split=val_split)
    kde_gau_pca = KDEyReduce(Cls(), val_split=val_split, n_components=5)
    kde_gau_pca10 = KDEyReduce(Cls(), val_split=val_split, n_components=10)
    kde_ait = KDEyCLR(Cls(), val_split=val_split)
    emq = EMQ(Cls(), exact_train_prev=False, val_split=val_split)


    # Bootstrap approaches:
    # --------------------------------------------------------------------------------------------------------
    #yield 'BootstrapACC', acc, acc_hyper, lambda hyper: _AggregativeBootstrap(ACC(Cls()), n_test_samples=1000, random_state=0), multiclass_method
    #yield 'BootstrapEMQ', emq, on_calib_error='backup', val_split=5), emq_hyper, lambda hyper: _AggregativeBootstrap(EMQ(Cls(), on_calib_error='backup', calib=hyper['calib'], val_split=5), n_test_samples=1000, random_state=0), multiclass_method
    #yield 'BootstrapHDy', hdy, hdy_hyper, lambda hyper: _AggregativeBootstrap(DMy(Cls(), **hyper), n_test_samples=1000, random_state=0), multiclass_method
    #yield 'BootstrapKDEy', kde_gau, kdey_hyper, lambda hyper: _AggregativeBootstrap(KDEyML(Cls(), **hyper), n_test_samples=1000, random_state=0, verbose=True), multiclass_method

    # Bayesian approaches:
    # --------------------------------------------------------------------------------------------------------
    # yield 'BayesianACC', acc, acc_hyper, lambda hyper: BayesianCC(Cls(), val_split=val_split, mcmc_seed=0), multiclass_method
    #yield 'BayesianHDy', hdy, hdy_hyper, lambda hyper: PQ(Cls(), val_split=val_split, stan_seed=0, **hyper), only_binary
    # yield f'BaKDE-Ait-numpyro', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(), kernel='aitchison', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    #yield f'BaKDE-Gau-numpyro', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    #yield f'BaKDE-Gau-scale', kde_gau_scale, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    yield f'BaKDE-Gau-pca5', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    yield f'BaKDE-Gau-pca5*', kde_gau_pca, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=5, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    yield f'BaKDE-Gau-pca10', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    yield f'BaKDE-Gau-pca10*', kde_gau_pca10, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), reduce=10, temperature=None, kernel='gaussian', mcmc_seed=0, engine='numpyro', val_split=val_split,  **hyper), multiclass_method
    # yield f'BaKDE-Gau-H0', KDEyFresh(Cls(), bandwidth=0.4), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=0.4, kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
    # yield f'BaKDE-Gau-H1', KDEyFresh(Cls(), bandwidth=1.), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1., kernel='gaussian', mcmc_seed=0, engine='numpyro', **hyper), multiclass_method
    # yield f'BaKDE-Gau-H2', KDEyFresh(Cls(), bandwidth=1.5), cls_hyper, lambda hyper: BayesianKDEy(Cls(), bandwidth=1.5,
    #                                                                                          kernel='gaussian',
    #                                                                                          mcmc_seed=0,
    #                                                                                          engine='numpyro',
    #                                                                                          **hyper), multiclass_method
    # yield f'BaKDE-Ait-T*', kde_ait, kdey_hyper_clr, lambda hyper: BayesianKDEy(Cls(),kernel='aitchison', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
    #yield f'BaKDE-Gau-T*', kde_gau, kdey_hyper, lambda hyper: BayesianKDEy(Cls(), kernel='gaussian', mcmc_seed=0, engine='numpyro', temperature=None, val_split=val_split, **hyper), multiclass_method
    # yield 'BayEMQ', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=1, exact_train_prev=False, val_split=val_split), multiclass_method
    # yield 'BayEMQ*', emq, acc_hyper, lambda hyper: BayesianMAPLS(Cls(), prior='uniform', temperature=None, exact_train_prev=False, val_split=val_split), multiclass_method


def model_selection(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, grid: dict):
    with qp.util.temp_seed(0):
        if isinstance(point_quantifier, KDEyScaledB) and 'bandwidth' in grid:
            def scale_bandwidth(bandwidth, n_classes, beta=0.5):
                return bandwidth * np.power(n_classes, beta)
            n = dataset.get_training().n_classes
            grid['bandwidth'] = [scale_bandwidth(b, n) for b in grid['bandwidth']]
            print('bandwidth scaled')
        print(f'performing model selection for {point_quantifier.__class__.__name__} with grid {grid}')
        # model selection
        if len(grid)>0:
            train, val_prot = dataset.get_train_valprot_for_modsel()
            mod_sel = GridSearchQ(
                model=point_quantifier,
                param_grid=grid,
                protocol=val_prot,
                refit=False,
                n_jobs=-1,
                verbose=True
            ).fit(*train.Xy)
            best_params = mod_sel.best_params_
        else:
            best_params = {}

        return best_params


def temperature_calibration(dataset: DatasetHandler, uncertainty_quantifier):
    temperature = None
    if hasattr(uncertainty_quantifier, 'temperature'):
        if uncertainty_quantifier.temperature is None:
            print('calibrating temperature')
            train, val_prot = dataset.get_train_valprot_for_modsel()
            if dataset.name.startswith('LeQua'):
                temp_grid=[100., 500, 1000, 5_000, 10_000, 50_000]
            else:
                temp_grid=[.5, 1., 1.5, 2., 5., 10., 100., 1000.]
            temperature = temp_calibration(uncertainty_quantifier, train, val_prot, temp_grid=temp_grid, n_jobs=-1, amplitude_threshold=.999)
            uncertainty_quantifier.temperature = temperature
        else:
            temperature = uncertainty_quantifier.temperature
    return temperature


def experiment(dataset: DatasetHandler, point_quantifier: AggregativeQuantifier, method_name:str, grid: dict, uncertainty_quant_constructor, hyper_choice_path: Path):

    with qp.util.temp_seed(0):

        # model selection
        best_hyperparams = qp.util.pickled_resource(
            hyper_choice_path, model_selection, dataset, cp(point_quantifier), grid
        )
        print(f'{best_hyperparams=}')

        t_init = time()
        uncertainty_quantifier = uncertainty_quant_constructor(best_hyperparams)
        temperature = temperature_calibration(dataset, uncertainty_quantifier)
        training, test_generator = dataset.get_train_testprot_for_eval()
        uncertainty_quantifier.fit(*training.Xy)
        tr_time = time() - t_init

        # test
        train_prevalence = training.prevalence()
        results = defaultdict(list)
        pbar = tqdm(enumerate(test_generator()), total=test_generator.total())
        for i, (sample_X, true_prevalence) in pbar:
            t_init = time()
            point_estimate, region = uncertainty_quantifier.predict_conf(sample_X)
            ttime = time()-t_init

            results['true-prevs'].append(true_prevalence)
            results['point-estim'].append(point_estimate)
            results['shift'].append(qp.error.ae(true_prevalence, train_prevalence))
            results['ae'].append(qp.error.ae(prevs_true=true_prevalence, prevs_hat=point_estimate))
            results['rae'].append(qp.error.rae(prevs_true=true_prevalence, prevs_hat=point_estimate))
            results['sre'].append(qp.error.sre(prevs_true=true_prevalence, prevs_hat=point_estimate, prevs_train=train_prevalence))
            results['coverage'].append(region.coverage(true_prevalence))
            results['amplitude'].append(region.montecarlo_proportion(n_trials=50_000))
            results['test-time'].append(ttime)
            results['samples'].append(region.samples)

            pbar.set_description(f'{method_name} MAE={np.mean(results["ae"]):.5f} W={np.mean(results["sre"]):.5f} Cov={np.mean(results["coverage"]):.5f} AMP={np.mean(results["amplitude"]):.5f}')

        report = {
            'optim_hyper': best_hyperparams,
            'train_time': tr_time,
            'train-prev': train_prevalence,
            'results': {k:np.asarray(v) for k,v in results.items()},
            'temperature': temperature
        }

        return report


def check_skip_experiment(method_scope, dataset: DatasetHandler):
    if method_scope == 'only_binary' and not dataset.is_binary():
        return True
    if method_scope == 'only_multiclass' and dataset.is_binary():
        return True
    return False


if __name__ == '__main__':

    result_dir = RESULT_DIR

    for data_handler in [CIFAR100Handler]:#, UCIMulticlassHandler,LeQuaHandler, VisualDataHandler, CIFAR100Handler]:
        for dataset in data_handler.iter():
            qp.environ['SAMPLE_SIZE'] = dataset.sample_size
            print(f'dataset={dataset.name}')
            #if dataset.name != 'abalone':
            #    continue

            problem_type = 'binary' if dataset.is_binary() else 'multiclass'

            for method_name, surrogate_quant, hyper_params, withconf_constructor, method_scope in methods(dataset):
                if check_skip_experiment(method_scope, dataset):
                    continue

                result_path = experiment_path(result_dir / problem_type, dataset.name, method_name)
                hyper_path  = experiment_path(result_dir / 'hyperparams' / problem_type, dataset.name, surrogate_quant.__class__.__name__)

                report = qp.util.pickled_resource(
                    result_path, experiment, dataset, surrogate_quant, method_name, hyper_params, withconf_constructor, hyper_path
                )

                print(f'dataset={dataset.name}, '
                      f'method={method_name}: '
                      f'mae={report["results"]["ae"].mean():.5f}, '
                      f'W={report["results"]["sre"].mean():.5f}, '
                      f'coverage={report["results"]["coverage"].mean():.5f}, '
                      f'amplitude={report["results"]["amplitude"].mean():.5f}, ')