from sklearn.svm import LinearSVC from class_weight_model import ClassWeightPCC # from classification.methods import LowRankLogisticRegression # from method.experimental import ExpMax, VarExpMax from common import * from method.meta import QuaNet from quantification_stumps_model import QuantificationStumpRegressor from quapy.method.aggregative import CC, ACC, PCC, PACC, MAX, MS, MS2, EMQ, SVMAE, HDy from quapy.method.meta import EHDy import numpy as np import os import pickle import itertools import argparse import torch import shutil SAMPLE_SIZE = 100 N_FOLDS = 5 N_REPEATS = 1 N_JOBS = -1 CUDA_N_JOBS = 2 ENSEMBLE_N_JOBS = -1 qp.environ['SAMPLE_SIZE'] = SAMPLE_SIZE __C_range = np.logspace(-3, 3, 7) lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} svmperf_params = {'C': __C_range} def quantification_models(): # yield 'cc', CC(newLR()), lr_params # yield 'acc', ACC(newLR()), lr_params yield 'pcc.opt', PCC(newLR()), lr_params yield 'pacc.opt', PACC(newLR()), lr_params yield 'wpacc.opt', ClassWeightPCC(), lr_params yield 'ds.opt', QuantificationStumpRegressor(SAMPLE_SIZE), {'C': __C_range} # yield 'pcc.opt.svm', PCC(LinearSVC()), lr_params # yield 'pacc.opt.svm', PACC(LinearSVC()), lr_params # yield 'wpacc.opt.svm', ClassWeightPCC(LinearSVC), lr_params # yield 'wpacc.opt2', ClassWeightPCC(C=__C_range), lr_params # this cannot work in its current version (see notes in the class_weight_model.py file) # yield 'MAX', MAX(newLR()), lr_params # yield 'MS', MS(newLR()), lr_params # yield 'MS2', MS2(newLR()), lr_params yield 'sldc', EMQ(calibratedLR()), lr_params # yield 'svmmae', SVMAE(), svmperf_params # yield 'hdy', HDy(newLR()), lr_params # yield 'EMdiag', ExpMax(cov_type='diag'), None # yield 'EMfull', ExpMax(cov_type='full'), None # yield 'EMtied', ExpMax(cov_type='tied'), None # yield 'EMspherical', ExpMax(cov_type='spherical'), None # yield 'VEMdiag', VarExpMax(cov_type='diag'), None # yield 'VEMfull', VarExpMax(cov_type='full'), None # yield 'VEMtied', VarExpMax(cov_type='tied'), None # yield 'VEMspherical', VarExpMax(cov_type='spherical'), None # def quantification_cuda_models(): # device = 'cuda' if torch.cuda.is_available() else 'cpu' # print(f'Running QuaNet in {device}') # learner = LowRankLogisticRegression(**newLR().get_params()) # yield 'quanet', QuaNet(learner, SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params # def quantification_ensembles(): # param_mod_sel = { # 'sample_size': SAMPLE_SIZE, # 'n_prevpoints': 21, # 'n_repetitions': 5, # 'refit': True, # 'verbose': False # } # common = { # 'size': 30, # 'red_size': 15, # 'max_sample_size': None, # same as training set # 'n_jobs': ENSEMBLE_N_JOBS, # 'param_grid': lr_params, # 'param_mod_sel': param_mod_sel, # 'val_split': 0.4, # 'min_pos': 5 # } # # hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection # will be skipped (by setting hyperparameters to None) # hyper_none = None # yield 'ehdymaeds', EHDy(newLR(), optim='mae', policy='ds', **common), hyper_none def run(experiment): optim_loss, dataset_name, (model_name, model, hyperparams) = experiment if dataset_name in ['acute.a', 'acute.b', 'iris.1']: return collection = qp.datasets.fetch_UCILabelledCollection(dataset_name) for run, data in enumerate(qp.data.Dataset.kFCV(collection, nfolds=N_FOLDS, nrepeats=N_REPEATS)): if is_already_computed(args.results, dataset_name, model_name, run=run, optim_loss=optim_loss): print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.') continue print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}') # model selection (hyperparameter optimization for a quantification-oriented loss) if hyperparams is not None: model_selection = qp.model_selection.GridSearchQ( model, param_grid=hyperparams, sample_size=SAMPLE_SIZE, n_prevpoints=21, n_repetitions=25, error=optim_loss, refit=True, timeout=60 * 60, verbose=True ) model_selection.fit(data.training) model = model_selection.best_model() best_params = model_selection.best_params_ else: model.fit(data.training) best_params = {} # model evaluation true_prevalences, estim_prevalences = qp.evaluation.artificial_prevalence_prediction( model, test=data.test, sample_size=SAMPLE_SIZE, n_prevpoints=21, n_repetitions=100, n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1 ) test_true_prevalence = data.test.prevalence() evaluate_experiment(true_prevalences, estim_prevalences) save_results(args.results, dataset_name, model_name, run, optim_loss, true_prevalences, estim_prevalences, data.training.prevalence(), test_true_prevalence, best_params) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Run experiments for UCI ML Quantification') parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results') parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification', help='path to the directory with svmperf') parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint', help='path to the directory where to dump QuaNet checkpoints') args = parser.parse_args() print(f'Result folder: {args.results}') np.random.seed(0) qp.environ['SVMPERF_HOME'] = args.svmperfpath optim_losses = ['mae'] datasets = qp.datasets.UCI_DATASETS models = quantification_models() # for runargs in itertools.product(optim_losses, datasets, models): # run(runargs) qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=N_JOBS) # models = quantification_cuda_models() # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=CUDA_N_JOBS) # models = quantification_ensembles() # qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1) shutil.rmtree(args.checkpointdir, ignore_errors=True)