1
0
Fork 0
QuaPy/NewMethods/uci_experiments.py

174 lines
6.7 KiB
Python

from sklearn.svm import LinearSVC
from class_weight_model import ClassWeightPCC
# from classification.methods import LowRankLogisticRegression
# from method.experimental import ExpMax, VarExpMax
from common import *
from method.meta import QuaNet
from quantification_stumps_model import QuantificationStumpRegressor
from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy
from quapy.method.meta import EHDy
import numpy as np
import os
import pickle
import itertools
import argparse
import torch
import shutil
SAMPLE_SIZE = 100
N_FOLDS = 5
N_REPEATS = 1
N_JOBS = -1
CUDA_N_JOBS = 2
ENSEMBLE_N_JOBS = -1
qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE
__C_range = np.logspace(-3, 3, 7)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
def quantification_models():
# yield 'cc', CC(newLR()), lr_params
# yield 'acc', ACC(newLR()), lr_params
yield 'pcc.opt', PCC(newLR()), lr_params
yield 'pacc.opt', PACC(newLR()), lr_params
yield 'wpacc.opt', ClassWeightPCC(), lr_params
yield 'ds.opt', QuantificationStumpRegressor(SAMPLE_SIZE), {'C': __C_range}
# yield 'pcc.opt.svm', PCC(LinearSVC()), lr_params
# yield 'pacc.opt.svm', PACC(LinearSVC()), lr_params
# yield 'wpacc.opt.svm', ClassWeightPCC(LinearSVC), lr_params
# yield 'wpacc.opt2', ClassWeightPCC(C=__C_range), lr_params # this cannot work in its current version (see notes in the class_weight_model.py file)
# yield 'MAX', MAX(newLR()), lr_params
# yield 'MS', MS(newLR()), lr_params
# yield 'MS2', MS2(newLR()), lr_params
yield 'sldc', EMQ(calibratedLR()), lr_params
# yield 'svmmae', SVMAE(), svmperf_params
# yield 'hdy', HDy(newLR()), lr_params
# yield 'EMdiag', ExpMax(cov_type='diag'), None
# yield 'EMfull', ExpMax(cov_type='full'), None
# yield 'EMtied', ExpMax(cov_type='tied'), None
# yield 'EMspherical', ExpMax(cov_type='spherical'), None
# yield 'VEMdiag', VarExpMax(cov_type='diag'), None
# yield 'VEMfull', VarExpMax(cov_type='full'), None
# yield 'VEMtied', VarExpMax(cov_type='tied'), None
# yield 'VEMspherical', VarExpMax(cov_type='spherical'), None
# def quantification_cuda_models():
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# print(f'Running QuaNet in {device}')
# learner = LowRankLogisticRegression(**newLR().get_params())
# yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
# def quantification_ensembles():
# param_mod_sel = {
# 'sample_size': SAMPLE_SIZE,
# 'n_prevpoints': 21,
# 'n_repetitions': 5,
# 'refit': True,
# 'verbose': False
# }
# common = {
# 'size': 30,
# 'red_size': 15,
# 'max_sample_size': None, # same as training set
# 'n_jobs': ENSEMBLE_N_JOBS,
# 'param_grid': lr_params,
# 'param_mod_sel': param_mod_sel,
# 'val_split': 0.4,
# 'min_pos': 5
# }
#
# hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
# will be skipped (by setting hyperparameters to None)
# hyper_none = None
# yield 'ehdymaeds', EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none
def run(experiment):
optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return
collection = qp.datasets.fetch_UCILabelledCollection(dataset_name)
for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=N_FOLDS, nrepeats=N_REPEATS)):
if is_already_computed(args.results, dataset_name, model_name, run=run, optim_loss=optim_loss):
print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
continue
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
# model selection (hyperparameter optimization for a quantification-oriented loss)
if hyperparams is not None:
model_selection = qp.model_selection.GridSearchQ(
model,
param_grid=hyperparams,
sample_size=SAMPLE_SIZE,
n_prevpoints=21,
n_repetitions=25,
error=optim_loss,
refit=True,
timeout=60 * 60,
verbose=True
)
model_selection.fit(data.training)
model = model_selection.best_model()
best_params = model_selection.best_params_
else:
model.fit(data.training)
best_params = {}
# model evaluation
true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction(
model,
test=data.test,
sample_size=SAMPLE_SIZE,
n_prevpoints=21,
n_repetitions=100,
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
)
test_true_prevalence = data.test.prevalence()
evaluate_experiment(true_prevalences, estim_prevalences)
save_results(args.results, dataset_name, model_name, run, optim_loss,
true_prevalences, estim_prevalences,
data.training.prevalence(), test_true_prevalence,
best_params)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run experiments for UCI ML Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str,
help='path to the directory where to store the results')
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
help='path to the directory with svmperf')
parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
help='path to the directory where to dump QuaNet checkpoints')
args = parser.parse_args()
print(f'Result folder: {args.results}')
np.random.seed(0)
qp.environ['SVMPERF_HOME'] = args.svmperfpath
optim_losses = ['mae']
datasets = qp.datasets.UCI_DATASETS
models = quantification_models()
# for runargs in itertools.product(optim_losses, datasets, models):
# run(runargs)
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS)
# models = quantification_cuda_models()
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS)
# models = quantification_ensembles()
# qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
shutil.rmtree(args.checkpointdir, ignore_errors=True)