forked from moreo/QuaPy
parallelizing stuff
This commit is contained in:
parent
b941c0665e
commit
db1dbe2534
MultiLabel
|
@ -23,7 +23,7 @@ import pickle
|
||||||
|
|
||||||
def cls():
|
def cls():
|
||||||
# return LinearSVC()
|
# return LinearSVC()
|
||||||
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
|
return LogisticRegression(max_iter=1000, solver='lbfgs')
|
||||||
|
|
||||||
|
|
||||||
def calibratedCls():
|
def calibratedCls():
|
||||||
|
@ -38,10 +38,10 @@ n_samples = 5000
|
||||||
|
|
||||||
def models():
|
def models():
|
||||||
yield 'NaiveCC', MultilabelNaiveAggregativeQuantifier(CC(cls()))
|
yield 'NaiveCC', MultilabelNaiveAggregativeQuantifier(CC(cls()))
|
||||||
yield 'NaivePCC', MultilabelNaiveAggregativeQuantifier(PCC(cls()))
|
# yield 'NaivePCC', MultilabelNaiveAggregativeQuantifier(PCC(cls()))
|
||||||
yield 'NaiveACC', MultilabelNaiveAggregativeQuantifier(ACC(cls()))
|
# yield 'NaiveACC', MultilabelNaiveAggregativeQuantifier(ACC(cls()))
|
||||||
yield 'NaivePACC', MultilabelNaiveAggregativeQuantifier(PACC(cls()))
|
# yield 'NaivePACC', MultilabelNaiveAggregativeQuantifier(PACC(cls()))
|
||||||
yield 'HDy', MultilabelNaiveAggregativeQuantifier(HDy(cls()))
|
# yield 'HDy', MultilabelNaiveAggregativeQuantifier(HDy(cls()))
|
||||||
# yield 'EMQ', MultilabelQuantifier(EMQ(calibratedCls()))
|
# yield 'EMQ', MultilabelQuantifier(EMQ(calibratedCls()))
|
||||||
# yield 'StackCC', MLCC(MultilabelStackedClassifier(cls()))
|
# yield 'StackCC', MLCC(MultilabelStackedClassifier(cls()))
|
||||||
# yield 'StackPCC', MLPCC(MultilabelStackedClassifier(cls()))
|
# yield 'StackPCC', MLPCC(MultilabelStackedClassifier(cls()))
|
||||||
|
@ -135,6 +135,36 @@ def print_info(train, test):
|
||||||
print(f'MLPE: {qp.error.mae(train.prevalence(), test.prevalence()):.5f}')
|
print(f'MLPE: {qp.error.mae(train.prevalence(), test.prevalence()):.5f}')
|
||||||
|
|
||||||
|
|
||||||
|
def save_results(npp_results, app_results, result_path):
|
||||||
|
# results are lists of tuples of (true_prevs, estim_prevs)
|
||||||
|
# each true_prevs is an ndarray of ndim=2, but the second dimension is constrained
|
||||||
|
def _prepare_result_lot(lot_results):
|
||||||
|
true_prevs, estim_prevs = lot_results
|
||||||
|
return {
|
||||||
|
'true_prevs': [true_i[:,0].flatten() for true_i in true_prevs], # removes the constrained prevalence
|
||||||
|
'estim_prevs': [estim_i[:,0].flatten() for estim_i in estim_prevs] # removes the constrained prevalence
|
||||||
|
}
|
||||||
|
results = {
|
||||||
|
'npp': _prepare_result_lot(npp_results),
|
||||||
|
'app': _prepare_result_lot(app_results),
|
||||||
|
}
|
||||||
|
pickle.dump(results, open(result_path, 'wb'), pickle.HIGHEST_PROTOCOL)
|
||||||
|
|
||||||
|
|
||||||
|
def load_results(result_path):
|
||||||
|
def _unpack_result_lot(lot_result):
|
||||||
|
true_prevs = lot_result['true_prevs']
|
||||||
|
true_prevs = [np.vstack([true_i, 1 - true_i]).T for true_i in true_prevs] # add the constrained prevalence
|
||||||
|
estim_prevs = lot_result['estim_prevs']
|
||||||
|
estim_prevs = [np.vstack([estim_i, 1 - estim_i]).T for estim_i in estim_prevs] # add the constrained prevalence
|
||||||
|
return true_prevs, estim_prevs
|
||||||
|
results = pickle.load(open(result_path, 'rb'))
|
||||||
|
results_npp = _unpack_result_lot(results['npp'])
|
||||||
|
results_app = _unpack_result_lot(results['app'])
|
||||||
|
return results_npp, results_app
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def run_experiment(dataset_name, model_name, model):
|
def run_experiment(dataset_name, model_name, model):
|
||||||
result_path = f'{opt.results}/{dataset_name}_{model_name}.pkl'
|
result_path = f'{opt.results}/{dataset_name}_{model_name}.pkl'
|
||||||
if already_run(result_path):
|
if already_run(result_path):
|
||||||
|
@ -147,10 +177,11 @@ def run_experiment(dataset_name, model_name, model):
|
||||||
|
|
||||||
model.fit(train)
|
model.fit(train)
|
||||||
|
|
||||||
results = dict()
|
results_npp = ml_natural_prevalence_prediction(model, test, sample_size, repeats=100)
|
||||||
results['npp'] = ml_natural_prevalence_prediction(model, test, sample_size, repeats=100)
|
results_app = ml_artificial_prevalence_prediction(model, test, sample_size, n_prevalences=11, repeats=5)
|
||||||
results['app'] = ml_artificial_prevalence_prediction(model, test, sample_size, n_prevalences=21, repeats=10)
|
save_results(results_npp, results_app, result_path)
|
||||||
pickle.dump(results, open(result_path, 'wb'), pickle.HIGHEST_PROTOCOL)
|
results_npp2, results_app2 = load_results(result_path)
|
||||||
|
print('pass')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -5,6 +5,7 @@ import quapy as qp
|
||||||
from MultiLabel.mlquantification import MLAggregativeQuantifier
|
from MultiLabel.mlquantification import MLAggregativeQuantifier
|
||||||
from mldata import MultilabelledCollection
|
from mldata import MultilabelledCollection
|
||||||
import itertools
|
import itertools
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
def __check_error(error_metric):
|
def __check_error(error_metric):
|
||||||
|
@ -63,15 +64,20 @@ def ml_artificial_prevalence_prediction(model,
|
||||||
repeats=10,
|
repeats=10,
|
||||||
random_seed=42):
|
random_seed=42):
|
||||||
|
|
||||||
test_indexes = []
|
nested_test_indexes = []
|
||||||
with qp.util.temp_seed(random_seed):
|
with qp.util.temp_seed(random_seed):
|
||||||
for cat in test.classes_:
|
for cat in test.classes_:
|
||||||
test_indexes.append(list(test.artificial_sampling_index_generator(sample_size=sample_size,
|
nested_test_indexes.append(list(test.artificial_sampling_index_generator(sample_size=sample_size,
|
||||||
category=cat,
|
category=cat,
|
||||||
n_prevalences=n_prevalences,
|
n_prevalences=n_prevalences,
|
||||||
repeats=repeats)))
|
repeats=repeats)))
|
||||||
test_indexes = list(itertools.chain.from_iterable(test_indexes))
|
def _predict_batch(test_indexes):
|
||||||
return _ml_prevalence_predictions(model, test, test_indexes)
|
return _ml_prevalence_predictions(model, test, test_indexes)
|
||||||
|
|
||||||
|
predictions = qp.util.parallel(_predict_batch, nested_test_indexes, n_jobs=-1)
|
||||||
|
true_prevs = list(itertools.chain.from_iterable(trues for trues, estims in predictions))
|
||||||
|
estim_prevs = list(itertools.chain.from_iterable(estims for trues, estims in predictions))
|
||||||
|
return true_prevs, estim_prevs
|
||||||
|
|
||||||
|
|
||||||
def ml_artificial_prevalence_evaluation(model,
|
def ml_artificial_prevalence_evaluation(model,
|
||||||
|
|
Loading…
Reference in New Issue