From 0f4008e18d1ba96f56035ed6243192de95eb8a78 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Mon, 30 Oct 2023 09:41:52 +0100 Subject: [PATCH] switching to devel --- distribution_matching/commons.py | 5 +- distribution_matching/lequa_experiments.py | 80 +++++++++++----------- laboratory/method_dxs.py | 2 +- 3 files changed, 45 insertions(+), 42 deletions(-) diff --git a/distribution_matching/commons.py b/distribution_matching/commons.py index 970dafa..39bf5ec 100644 --- a/distribution_matching/commons.py +++ b/distribution_matching/commons.py @@ -8,7 +8,7 @@ from distribution_matching.method_dirichlety import DIRy from sklearn.linear_model import LogisticRegression from method_kdey_closed_efficient import KDEyclosed_efficient -METHODS = ['EMQ', 'EMQ-C', 'DM', 'DM-T', 'DM-HD', 'KDEy-DMhd3', 'DM-CS', 'KDEy-closed++', 'KDEy-ML'] #['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM', 'KDEy-DMhd3', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #, 'KDEy-DMhd2'] #, 'KDEy-DMhd2', 'DM-HD'] 'KDEy-DMjs', 'KDEy-DM', 'KDEy-ML+', 'KDEy-DMhd3+', +METHODS = ['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM-T', 'DM-HD', 'KDEy-DMhd3', 'DM-CS', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #['ACC', 'PACC', 'HDy-OvA', 'DIR', 'DM', 'KDEy-DMhd3', 'KDEy-closed++', 'EMQ', 'KDEy-ML'] #, 'KDEy-DMhd2'] #, 'KDEy-DMhd2', 'DM-HD'] 'KDEy-DMjs', 'KDEy-DM', 'KDEy-ML+', 'KDEy-DMhd3+', 'EMQ-C', BIN_METHODS = [x.replace('-OvA', '') for x in METHODS] @@ -63,6 +63,9 @@ def new_method(method, **lr_kwargs): method_params = {'exact_train_prev': [False], 'recalib': ['bcts']} param_grid = {**method_params, **hyper_LR} quantifier = EMQ(lr) + elif method == 'HDy': + param_grid = hyper_LR + quantifier = HDy(lr) elif method == 'HDy-OvA': param_grid = {'binary_quantifier__' + key: val for key, val in hyper_LR.items()} quantifier = OneVsAllAggregative(HDy(lr)) diff --git a/distribution_matching/lequa_experiments.py b/distribution_matching/lequa_experiments.py index 08d800c..1fe127b 100644 --- a/distribution_matching/lequa_experiments.py +++ b/distribution_matching/lequa_experiments.py @@ -2,64 +2,64 @@ import pickle import numpy as np import os import pandas as pd -from distribution_matching.commons import METHODS, new_method, show_results +from distribution_matching.commons import METHODS, BIN_METHODS, new_method, show_results import quapy as qp from quapy.model_selection import GridSearchQ - if __name__ == '__main__': - qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE['T1B'] - qp.environ['N_JOBS'] = -1 - for optim in ['mae', 'mrae']: + for task in ['T1A', 'T1B']: + qp.environ['SAMPLE_SIZE'] = qp.datasets.LEQUA2022_SAMPLE_SIZE[task] + qp.environ['N_JOBS'] = -1 + for optim in ['mae', 'mrae']: - result_dir = f'results/lequa/{optim}' + result_dir = f'results/lequa/{task}/{optim}' - os.makedirs(result_dir, exist_ok=True) + os.makedirs(result_dir, exist_ok=True) - for method in METHODS: + for method in (METHODS if task=='T1B' else BIN_METHODS): - print('Init method', method) + print('Init method', method) - result_path = f'{result_dir}/{method}' + result_path = f'{result_dir}/{method}' - if os.path.exists(result_path+'.csv'): - print(f'file {result_path}.csv already exist; skipping') - continue + if os.path.exists(result_path+'.csv'): + print(f'file {result_path}.csv already exist; skipping') + continue - with open(result_path+'.csv', 'wt') as csv: - csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n') + with open(result_path+'.csv', 'wt') as csv: + csv.write(f'Method\tDataset\tMAE\tMRAE\tKLD\n') - dataset = 'T1B' - train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset) - print(f'init {dataset} #instances: {len(train)}') - param_grid, quantifier = new_method(method) + dataset = task + train, val_gen, test_gen = qp.datasets.fetch_lequa2022(dataset) + print(f'init {dataset} #instances: {len(train)}') + param_grid, quantifier = new_method(method) - if param_grid is not None: - modsel = GridSearchQ(quantifier, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1, error=optim) + if param_grid is not None: + modsel = GridSearchQ(quantifier, param_grid, protocol=val_gen, refit=False, n_jobs=-1, verbose=1, error=optim) - modsel.fit(train) - print(f'best params {modsel.best_params_}') - print(f'best score {modsel.best_score_}') - pickle.dump( - (modsel.best_params_, modsel.best_score_,), - open(f'{result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) + modsel.fit(train) + print(f'best params {modsel.best_params_}') + print(f'best score {modsel.best_score_}') + pickle.dump( + (modsel.best_params_, modsel.best_score_,), + open(f'{result_path}.hyper.pkl', 'wb'), pickle.HIGHEST_PROTOCOL) - quantifier = modsel.best_model() - else: - print('debug mode... skipping model selection') - quantifier.fit(train) + quantifier = modsel.best_model() + else: + print('debug mode... skipping model selection') + quantifier.fit(train) - report = qp.evaluation.evaluation_report( - quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'], - verbose=True, verbose_error=optim[1:], n_jobs=-1 - ) - means = report.mean() - report.to_csv(result_path+'.dataframe') - csv.write(f'{method}\tLeQua-T1B\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n') - csv.flush() - print(means) + report = qp.evaluation.evaluation_report( + quantifier, protocol=test_gen, error_metrics=['mae', 'mrae', 'kld'], + verbose=True, verbose_error=optim[1:], n_jobs=-1 + ) + means = report.mean() + report.to_csv(result_path+'.dataframe') + csv.write(f'{method}\tLeQua-{task}\t{means["mae"]:.5f}\t{means["mrae"]:.5f}\t{means["kld"]:.5f}\n') + csv.flush() + print(means) show_results(result_path) diff --git a/laboratory/method_dxs.py b/laboratory/method_dxs.py index f0f0cf9..e5b3bcd 100644 --- a/laboratory/method_dxs.py +++ b/laboratory/method_dxs.py @@ -79,7 +79,7 @@ if __name__ == '__main__': repeats = 10 error = 'mae' - div = 'HD' + div = 'topsoe' # generates tuples (dataset, method, method_name) # (the dataset is needed for methods that process the dataset differently)