producing tables in benchmarks

2024-04-08 15:25:29 +02:00 · 2024-04-08 15:25:29 +02:00 · 49a8cf3b0d
parent a04723a976
commit 49a8cf3b0d
6 changed files with 369 additions and 1 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -0,0 +1,3 @@
 [submodule "result_table"]
 	path = result_table
 	url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git
--- a/quapy/benchmarking/init.py
+++ b/quapy/benchmarking/init.py
--- a/quapy/benchmarking/_base.py
+++ b/quapy/benchmarking/_base.py
@ -0,0 +1,313 @@
 import itertools
 import os
 from copy import deepcopy
 from os.path import join
 from dataclasses import dataclass
 from typing import List, Union, Callable
 from abc import ABC, abstractmethod
 import numpy as np
 import pandas as pd
 import pickle
 from sklearn.linear_model import LogisticRegression
 import quapy as qp
 from quapy.data import LabelledCollection
 from quapy.method.aggregative import PACC
 from quapy.protocol import APP, UPP, AbstractProtocol
 from quapy.model_selection import GridSearchQ
 from quapy.method.base import BaseQuantifier
 from result_table.src.table import Table
 def makedirs(dir):
    print('creating ', dir)
    os.makedirs(dir, exist_ok=True)
@dataclass
 class MethodDescriptor:
    id: str
    name: str
    instance: BaseQuantifier
    hyperparams: dict
 class Benchmark(ABC):
    ID_SEPARATOR = '__'  # used to separate components in a run-ID, cannot be used within the component IDs
    def __init__(self, home_dir, n_jobs=3):
        self.home_dir = home_dir
        self.n_jobs = n_jobs
        assert n_jobs!=-1, ('Setting n_jobs=-1 will probably blow your memory. '
                            'Specify a positive number.')
        makedirs(home_dir)
        makedirs(join(home_dir, 'results'))
        makedirs(join(home_dir, 'params'))
        makedirs(join(home_dir, 'tables'))
        makedirs(join(home_dir, 'plots'))
    def _run_id(self, method: MethodDescriptor, dataset: str):
        sep = Benchmark.ID_SEPARATOR
        assert sep not in method.id, \
            (f'separator {sep} cannot be used in method ID ({method.id}), '
             f'please change the method ID or redefine {Benchmark.ID_SEPARATOR=}')
        assert sep not in dataset, \
            (f'separator {sep} cannot be used in dataset name ({dataset}), '
             f'please redefine {Benchmark.ID_SEPARATOR=}')
        return sep.join([method.id, dataset])
    def _result_path(self, method: MethodDescriptor, dataset: str):
        id = self._run_id(method, dataset)
        return join(self.home_dir, 'results', id + '.pkl')
    def _params_path(self, method: MethodDescriptor, dataset: str):
        id = self._run_id(method, dataset)
        chosen = join(self.home_dir, 'params', id + 'chosen.pkl')
        scores = join(self.home_dir, 'params', id + 'scores.pkl')
        return chosen, scores
    def _exist_run(self, method: MethodDescriptor, dataset: str):
        return os.path.exists(self._result_path(method, dataset))
    def _open_method_dataset_result(self, method: MethodDescriptor, dataset: str):
        if not self._exist_run(method, dataset):
            raise ValueError(f'cannot open result for method={method.id} and {dataset=}')
    def check_dataset(self, dataset:str):
        assert dataset in self.list_datasets(), f'unknown dataset {dataset}'
    @abstractmethod
    def list_datasets(self)-> List[str]:
        ...
    @abstractmethod
    def run_method_dataset(self, method: MethodDescriptor, dataset:str, random_state=0)-> pd.DataFrame:
        ...
    def gen_tables(self, results, metrics=None):
        if metrics is None:
            metrics = ['mae', 'mrae', 'mkld', 'mnkld']
        tables = {}
        for (method, dataset, result) in results:
            col_metrics = result.columns.values[2:]
            for metric in metrics:
                if metric not in col_metrics:
                    print(f'error; requested {metric=} not found among the columns in the dataframe')
                    continue
                if metric not in tables:
                    tables[metric] = Table(name=metric)
                table = tables[metric]
                table.add(dataset, method.name, result[metric].values)
        Table.LatexPDF(join(self.home_dir, 'tables', 'results.pdf'), list(tables.values()))
    def gen_plots(self):
        pass
    def show_report(self, method, dataset, report: pd.DataFrame):
        id = method.id
        MAE = report['mae'].mean()
        mae_std = report['mae'].std()
        MRAE = report['mrae'].mean()
        mrae_std = report['mrae'].std()
        print(f'{id}\t{dataset}:\t{MAE=:.4f}+-{mae_std:.4f}\t{MRAE=:.4f}+-{mrae_std:.4f}')
    def run(self,
            methods: Union[List[MethodDescriptor], MethodDescriptor],
            datasets:Union[List[str],str]=None,
            force=False):
        if not isinstance(methods, list):
            methods = [methods]
        if datasets is None:
            datasets = self.list_datasets()
        elif not isinstance(datasets, list):
            datasets = [datasets]
        results = []
        pending_job_args = []
        for method, dataset in itertools.product(methods, datasets):
            self.check_dataset(dataset)
            if not force and self._exist_run(method, dataset):
                result = pd.read_pickle(self._result_path(method, dataset))
                results.append((method, dataset, result))
            else:
                pending_job_args.append((method, dataset))
        if len(pending_job_args)>0:
            remaining_results = qp.util.parallel_unpack(
                func=self.run_method_dataset,
                args=pending_job_args,
                n_jobs=self.n_jobs,
                seed=0,
                asarray=False
            )
            results += [(method, dataset, result) for (method, dataset), result in zip(pending_job_args, remaining_results)]
        # print results
        for method, dataset, result in results:
            self.show_report(method, dataset, result)
        self.gen_tables(results)
        self.gen_plots()
    # def gen_plots(self, methods=None):
    #     if methods is None:
    def __add__(self, other: 'Benchmark'):
        return CombinedBenchmark(self, other, self.n_jobs)
 class CombinedBenchmark(Benchmark):
    def __init__(self, benchmark_a:Benchmark, benchmark_b:Benchmark, n_jobs=-1):
        self.router = {
            **{dataset: benchmark_a for dataset in benchmark_a.list_datasets()},
            **{dataset: benchmark_b for dataset in benchmark_b.list_datasets()}
        }
        self.datasets = benchmark_a.list_datasets() + benchmark_b.list_datasets()
        self.n_jobs = n_jobs
    def list_datasets(self) -> List[str]:
        return self.datasets
    def run_method_dataset(self, method: MethodDescriptor, dataset:str, random_state=0) -> pd.DataFrame:
        return self.router[dataset].run_method_dataset(method, dataset, random_state)
    def _exist_run(self, method: MethodDescriptor, dataset: str):
        return self.router[dataset]._exist_run(method, dataset)
 class TypicalBenchmark(Benchmark):
    # def __init__(self, home_dir, ):
    @abstractmethod
    def get_sample_size(self)-> int:
        ...
    @abstractmethod
    def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset:str)->\
            (LabelledCollection, AbstractProtocol, LabelledCollection, AbstractProtocol):
        ...
    @abstractmethod
    def get_target_error_for_modsel(self)-> Union[str, Callable]:
        ...
    def run_method_dataset(self, method: MethodDescriptor, dataset: str, random_state=0) -> pd.DataFrame:
        print(f'Running method={method.id} in {dataset=}')
        sample_size = self.get_sample_size()
        qp.environ['SAMPLE_SIZE'] = sample_size
        q = deepcopy(method.instance)
        optim_for = self.get_target_error_for_modsel()
        with qp.util.temp_seed(random_state):
            # data split
            trModSel, valprotModSel, trEval, teprotEval =  self.get_trModsel_valprotModsel_trEval_teprotEval(dataset)
            # model selection
            modsel = GridSearchQ(
                model=q,
                param_grid=method.hyperparams,
                protocol=valprotModSel,
                error=optim_for,
                refit=False,
                n_jobs=-1,
                raise_errors=True,
                verbose=True
            ).fit(trModSel)
            # fit on the whole training data
            optimized_model = modsel.best_model_
            optimized_model.fit(trEval)
            # evaluation
            report = qp.evaluation.evaluation_report(
                model=optimized_model,
                protocol=teprotEval,
                error_metrics=qp.error.QUANTIFICATION_ERROR_NAMES
            )
            # data persistence
            chosen_path, scores_path = self._params_path(method, dataset)
            pickle.dump(modsel.best_params_, open(chosen_path, 'wb'), pickle.HIGHEST_PROTOCOL)
            pickle.dump(modsel.param_scores_, open(scores_path, 'wb'), pickle.HIGHEST_PROTOCOL)
            result_path = self._result_path(method, dataset)
            report.to_pickle(result_path)
        return report
 class UCIBinaryBenchmark(TypicalBenchmark):
    def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset: str) -> \
            (LabelledCollection, LabelledCollection, LabelledCollection, LabelledCollection):
        data = qp.datasets.fetch_UCIBinaryDataset(dataset)
        trEval, teEval = data.train_test
        trModsel, vaModsel = trEval.split_stratified()
        valprotModsel = APP(vaModsel, n_prevalences=21, repeats=25)
        testprotModsel = APP(teEval, n_prevalences=21, repeats=100)
        return trModsel, valprotModsel, trEval, testprotModsel
    def get_sample_size(self) -> int:
        return 100
    def get_target_error_for_modsel(self) -> Union[str, Callable]:
        return 'mae'
    def list_datasets(self)->List[str]:
        ignore = ['acute.a', 'acute.b', 'balance.2']
        return [d for d in qp.datasets.UCI_BINARY_DATASETS if d not in ignore]
 class UCIMultiBenchmark(TypicalBenchmark):
    def list_datasets(self) -> List[str]:
        return qp.datasets.UCI_MULTICLASS_DATASETS
    def get_trModsel_valprotModsel_trEval_teprotEval(self, dataset: str) -> \
            (LabelledCollection, LabelledCollection, LabelledCollection, LabelledCollection):
        data = qp.datasets.fetch_UCIMulticlassDataset(dataset)
        trEval, teEval = data.train_test
        trModsel, vaModsel = trEval.split_stratified()
        valprotModsel = UPP(vaModsel, repeats=250)
        testprotModsel = UPP(teEval, repeats=1000)
        return trModsel, valprotModsel, trEval, testprotModsel
    def get_sample_size(self) -> int:
        return 500
    def get_target_error_for_modsel(self) -> Union[str, Callable]:
        return 'mae'
 if __name__ == '__main__':
    from quapy.benchmarking.typical import *
    # from quapy.method.aggregative import BayesianCC
    # bayes = MethodDescriptor(
    #     id='Bayesian',
    #     name='Bayesian(LR)',
    #     instance=BayesianCC(LogisticRegression()),
    #     hyperparams=wrap_cls_params(lr_hyper)
    # )
    # bench_bin = UCIBinaryBenchmark('../../Benchmarks/UCIbinary')
    bench_multi = UCIMultiBenchmark('../../Benchmarks/UCIMulti')
    # bench = bench_bin + bench_multi
    bench = bench_multi
    bench.run(methods=[cc, pcc, acc, pacc, sld, sld_bcts])
--- a/quapy/benchmarking/typical.py
+++ b/quapy/benchmarking/typical.py
@ -0,0 +1,51 @@
 import numpy as np
 from sklearn.linear_model import LogisticRegression
 from quapy.method.aggregative import CC, PCC, ACC, PACC, EMQ
 from quapy.benchmarking._base import MethodDescriptor
 lr_hyper = {'C': np.logspace(-3, 3, 7), 'class_weight': ['balanced', None]}
 wrap_cls_params = lambda params: {'classifier__' + key: val for key, val in params.items()}
 cc = MethodDescriptor(
    id='CC',
    name='CC(LR)',
    instance=CC(LogisticRegression()),
    hyperparams=wrap_cls_params(lr_hyper)
 )
 pcc = MethodDescriptor(
    id='PCC',
    name='PCC(LR)',
    instance=PCC(LogisticRegression()),
    hyperparams=wrap_cls_params(lr_hyper)
 )
 acc = MethodDescriptor(
    id='ACC',
    name='ACC(LR)',
    instance=ACC(LogisticRegression()),
    hyperparams=wrap_cls_params(lr_hyper)
 )
 pacc = MethodDescriptor(
    id='PACC',
    name='PACC(LR)',
    instance=PACC(LogisticRegression()),
    hyperparams=wrap_cls_params(lr_hyper)
 )
 sld = MethodDescriptor(
    id='SLD',
    name='SLD',
    instance=EMQ(LogisticRegression()),
    hyperparams=wrap_cls_params(lr_hyper)
 )
 sld_bcts = MethodDescriptor(
    id='SLD-BCTS',
    name='SLD-BCTS',
    instance=EMQ(LogisticRegression(), recalib='bcts', exact_train_prev=False),
    hyperparams=wrap_cls_params(lr_hyper)
 )
--- a/quapy/method/aggregative.py
+++ b/quapy/method/aggregative.py
@ -577,7 +577,7 @@ class PACC(AggregativeSoftQuantifier):
            raise ValueError(f"unknown solver; valid ones are {ACC.SOLVERS}")
        if self.method not in ACC.METHODS:
            raise ValueError(f"unknown method; valid ones are {ACC.METHODS}")
-        if self.clipping not in ACC.NORMALIZATIONS:
+        if self.norm not in ACC.NORMALIZATIONS:
            raise ValueError(f"unknown clipping; valid ones are {ACC.NORMALIZATIONS}")
    def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
--- a/1
+++ b/1
@ -0,0 +1 @@
 Subproject commit 01f8fb936bddaaa33aad026b450be13089ec1d7c
		`@ -0,0 +1 @@`
							`Subproject commit 01f8fb936bddaaa33aad026b450be13089ec1d7c`