1
0
Fork 0

parallelizing stuff

This commit is contained in:
Alejandro Moreo Fernandez 2021-08-27 12:21:53 +02:00
parent b941c0665e
commit db1dbe2534
2 changed files with 50 additions and 13 deletions

View File

@ -23,7 +23,7 @@ import pickle
def cls():
# return LinearSVC()
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
return LogisticRegression(max_iter=1000, solver='lbfgs')
def calibratedCls():
@ -38,10 +38,10 @@ n_samples = 5000
def models():
yield 'NaiveCC', MultilabelNaiveAggregativeQuantifier(CC(cls()))
yield 'NaivePCC', MultilabelNaiveAggregativeQuantifier(PCC(cls()))
yield 'NaiveACC', MultilabelNaiveAggregativeQuantifier(ACC(cls()))
yield 'NaivePACC', MultilabelNaiveAggregativeQuantifier(PACC(cls()))
yield 'HDy', MultilabelNaiveAggregativeQuantifier(HDy(cls()))
# yield 'NaivePCC', MultilabelNaiveAggregativeQuantifier(PCC(cls()))
# yield 'NaiveACC', MultilabelNaiveAggregativeQuantifier(ACC(cls()))
# yield 'NaivePACC', MultilabelNaiveAggregativeQuantifier(PACC(cls()))
# yield 'HDy', MultilabelNaiveAggregativeQuantifier(HDy(cls()))
# yield 'EMQ', MultilabelQuantifier(EMQ(calibratedCls()))
# yield 'StackCC', MLCC(MultilabelStackedClassifier(cls()))
# yield 'StackPCC', MLPCC(MultilabelStackedClassifier(cls()))
@ -135,6 +135,36 @@ def print_info(train, test):
print(f'MLPE: {qp.error.mae(train.prevalence(), test.prevalence()):.5f}')
def save_results(npp_results, app_results, result_path):
# results are lists of tuples of (true_prevs, estim_prevs)
# each true_prevs is an ndarray of ndim=2, but the second dimension is constrained
def _prepare_result_lot(lot_results):
true_prevs, estim_prevs = lot_results
return {
'true_prevs': [true_i[:,0].flatten() for true_i in true_prevs], # removes the constrained prevalence
'estim_prevs': [estim_i[:,0].flatten() for estim_i in estim_prevs] # removes the constrained prevalence
}
results = {
'npp': _prepare_result_lot(npp_results),
'app': _prepare_result_lot(app_results),
}
pickle.dump(results, open(result_path, 'wb'), pickle.HIGHEST_PROTOCOL)
def load_results(result_path):
def _unpack_result_lot(lot_result):
true_prevs = lot_result['true_prevs']
true_prevs = [np.vstack([true_i, 1 - true_i]).T for true_i in true_prevs] # add the constrained prevalence
estim_prevs = lot_result['estim_prevs']
estim_prevs = [np.vstack([estim_i, 1 - estim_i]).T for estim_i in estim_prevs] # add the constrained prevalence
return true_prevs, estim_prevs
results = pickle.load(open(result_path, 'rb'))
results_npp = _unpack_result_lot(results['npp'])
results_app = _unpack_result_lot(results['app'])
return results_npp, results_app
def run_experiment(dataset_name, model_name, model):
result_path = f'{opt.results}/{dataset_name}_{model_name}.pkl'
if already_run(result_path):
@ -147,10 +177,11 @@ def run_experiment(dataset_name, model_name, model):
model.fit(train)
results = dict()
results['npp'] = ml_natural_prevalence_prediction(model, test, sample_size, repeats=100)
results['app'] = ml_artificial_prevalence_prediction(model, test, sample_size, n_prevalences=21, repeats=10)
pickle.dump(results, open(result_path, 'wb'), pickle.HIGHEST_PROTOCOL)
results_npp = ml_natural_prevalence_prediction(model, test, sample_size, repeats=100)
results_app = ml_artificial_prevalence_prediction(model, test, sample_size, n_prevalences=11, repeats=5)
save_results(results_npp, results_app, result_path)
results_npp2, results_app2 = load_results(result_path)
print('pass')
if __name__ == '__main__':

View File

@ -5,6 +5,7 @@ import quapy as qp
from MultiLabel.mlquantification import MLAggregativeQuantifier
from mldata import MultilabelledCollection
import itertools
from tqdm import tqdm
def __check_error(error_metric):
@ -63,15 +64,20 @@ def ml_artificial_prevalence_prediction(model,
repeats=10,
random_seed=42):
test_indexes = []
nested_test_indexes = []
with qp.util.temp_seed(random_seed):
for cat in test.classes_:
test_indexes.append(list(test.artificial_sampling_index_generator(sample_size=sample_size,
nested_test_indexes.append(list(test.artificial_sampling_index_generator(sample_size=sample_size,
category=cat,
n_prevalences=n_prevalences,
repeats=repeats)))
test_indexes = list(itertools.chain.from_iterable(test_indexes))
return _ml_prevalence_predictions(model, test, test_indexes)
def _predict_batch(test_indexes):
return _ml_prevalence_predictions(model, test, test_indexes)
predictions = qp.util.parallel(_predict_batch, nested_test_indexes, n_jobs=-1)
true_prevs = list(itertools.chain.from_iterable(trues for trues, estims in predictions))
estim_prevs = list(itertools.chain.from_iterable(estims for trues, estims in predictions))
return true_prevs, estim_prevs
def ml_artificial_prevalence_evaluation(model,