1
0
Fork 0

quapy fixed

This commit is contained in:
Alejandro Moreo Fernandez 2021-01-22 09:58:12 +01:00
parent 1ba0748b59
commit bf1cc74ba1
7 changed files with 142 additions and 84 deletions

View File

@ -53,11 +53,10 @@ def quantification_models():
checkpointdir=args.checkpointdir, device=device), lr_params
else:
yield 'quanet', QuaNet(PCALR(**newLR().get_params()), settings.SAMPLE_SIZE,
patience=5,
tr_iter_per_poch=500, va_iter_per_poch=100,
checkpointdir=args.checkpointdir, device=device), lr_params
param_mod_sel={'sample_size':settings.SAMPLE_SIZE, 'n_prevpoints':21, 'n_repetitions':5}
#param_mod_sel={'sample_size':settings.SAMPLE_SIZE, 'n_prevpoints':21, 'n_repetitions':5}
#yield 'epaccmaeptr', EPACC(newLR(), param_grid=lr_params, optim='mae', policy='ptr', param_mod_sel=param_mod_sel, n_jobs=settings.ENSEMBLE_N_JOBS), None
# yield 'epaccmraeptr', EPACC(newLR(), param_grid=lr_params, optim='mrae', policy='ptr', param_mod_sel=param_mod_sel, n_jobs=settings.ENSEMBLE_N_JOBS), None
# yield 'epaccmae', EPACC(newLR(), param_grid=lr_params, optim='mae', policy='mae', param_mod_sel=param_mod_sel, n_jobs=settings.ENSEMBLE_N_JOBS), None
@ -165,6 +164,9 @@ def run(experiment):
benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence,
best_params)
if isinstance(model, QuaNet):
model.clean_checkpoint_dir()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
@ -180,7 +182,7 @@ if __name__ == '__main__':
np.random.seed(0)
optim_losses = ['mae'] # ['mae', 'mrae']
datasets = ['hcr', 'omd', 'sanders', 'sst'] # qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
models = quantification_models()
results = Parallel(n_jobs=settings.N_JOBS)(

View File

@ -1,14 +1,15 @@
from sklearn.base import BaseEstimator
from sklearn.decomposition import TruncatedSVD
from sklearn.linear_model import LogisticRegression
class PCALR:
class PCALR(BaseEstimator):
"""
An example of a classification method that also generates embedded inputs, as those required for QuaNet.
This example simply combines a Principal Component Analysis (PCA) with Logistic Regression (LR).
"""
def __init__(self, n_components=300, **kwargs):
def __init__(self, n_components=100, **kwargs):
self.n_components = n_components
self.learner = LogisticRegression(**kwargs)
@ -24,19 +25,19 @@ class PCALR:
self.learner.set_params(**params)
def fit(self, X, y):
self.pca = TruncatedSVD(self.n_components)
embedded = self.pca.fit_transform(X, y)
self.learner.fit(embedded, y)
self.learner.fit(X, y)
self.pca = TruncatedSVD(self.n_components).fit(X, y)
# embedded = self.pca.transform(X)
self.classes_ = self.learner.classes_
return self
def predict(self, X):
embedded = self.transform(X)
return self.learner.predict(embedded)
# X = self.transform(X)
return self.learner.predict(X)
def predict_proba(self, X):
embedded = self.transform(X)
return self.learner.predict_proba(embedded)
# X = self.transform(X)
return self.learner.predict_proba(X)
def transform(self, X):
return self.pca.transform(X)

View File

@ -17,7 +17,7 @@ def artificial_sampling_prediction(
sample_size,
n_prevpoints=210,
n_repetitions=1,
n_jobs=-1,
n_jobs=1,
random_seed=42,
verbose=True
):

View File

@ -34,8 +34,11 @@ class Ensemble(BaseQuantifier):
Information Fusion, 45, 1-15.
"""
def __init__(self, quantifier: BaseQuantifier, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, verbose=False):
assert policy in Ensemble.VALID_POLICIES, f'unknown policy={policy}; valid are {Ensemble.VALID_POLICIES}'
def __init__(self, quantifier: BaseQuantifier, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, verbose=True, max_sample_size=None):
assert policy in Ensemble.VALID_POLICIES, \
f'unknown policy={policy}; valid are {Ensemble.VALID_POLICIES}'
assert max_sample_size is None or max_sample_size > 0, \
'wrong value for max_sample_size; set to a positive number or None'
self.base_quantifier = quantifier
self.size = size
self.min_pos = min_pos
@ -44,6 +47,7 @@ class Ensemble(BaseQuantifier):
self.n_jobs = n_jobs
self.post_proba_fn = None
self.verbose = verbose
self.max_sample_size = max_sample_size
def sout(self, msg):
if self.verbose:
@ -64,9 +68,10 @@ class Ensemble(BaseQuantifier):
posteriors, self.post_proba_fn = self.ds_policy_get_posteriors(data)
is_static_policy = (self.policy in qp.error.QUANTIFICATION_ERROR_NAMES)
sample_size = len(data) if self.max_sample_size is None else min(self.max_sample_size, len(data))
self.ensemble = Parallel(n_jobs=self.n_jobs)(
delayed(_delayed_new_instance)(
self.base_quantifier, data, val_split, prev, posteriors, keep_samples=is_static_policy, verbose=self.verbose
self.base_quantifier, data, val_split, prev, posteriors, keep_samples=is_static_policy, verbose=self.verbose, sample_size=sample_size
) for prev in tqdm(prevs, desc='fitting ensamble')
)
@ -131,7 +136,7 @@ class Ensemble(BaseQuantifier):
that the distribution of posterior probabilities from training and test examples is compared by means of the
Hellinger Distance. However, how these posterior probabilities are generated is not specified. In the article,
a Logistic Regressor (LR) is used as the classifier device and that could be used for this purpose. However, in
general, a Quantifier is not necessarily an instance of Aggreggative Probabilistic Quantifiers, and so that the
general, a Quantifier is not necessarily an instance of Aggreggative Probabilistic Quantifiers, and so, that the
quantifier builds on top of a probabilistic classifier cannot be given for granted. Additionally, it would not
be correct to generate the posterior probabilities for training documents that have concurred in training the
classifier that generates them.
@ -196,11 +201,12 @@ def _delayed_new_instance(base_quantifier,
prev,
posteriors,
keep_samples,
verbose):
verbose,
sample_size):
if verbose:
print(f'\tfit-start for prev {F.strprev(prev)}')
print(f'\tfit-start for prev {F.strprev(prev)}, sample_size={sample_size}')
model = deepcopy(base_quantifier)
sample_index = data.sampling_index(len(data), *prev)
sample_index = data.sampling_index(sample_size, *prev)
sample = data.sampling_from_index(sample_index)
if val_split is None:
model.fit(sample)
@ -277,7 +283,7 @@ def _check_error(error):
def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None,
param_model_sel:dict=None,
size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, max_sample_size=None):
if optim is not None:
if param_grid is None:
raise ValueError(f'param_grid is None but optim was requested.')
@ -286,24 +292,24 @@ def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None,
error = _check_error(optim)
return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel,
size=size, min_pos=min_pos, red_size=red_size,
policy=policy, n_jobs=n_jobs)
policy=policy, n_jobs=n_jobs, max_sample_size=max_sample_size)
def ECC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
return ensembleFactory(learner, CC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
def ECC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, max_sample_size=None):
return ensembleFactory(learner, CC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs, max_sample_size=max_sample_size)
def EACC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
return ensembleFactory(learner, ACC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
def EACC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, max_sample_size=None):
return ensembleFactory(learner, ACC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs, max_sample_size=max_sample_size)
def EPACC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
return ensembleFactory(learner, PACC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
def EPACC(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, max_sample_size=None):
return ensembleFactory(learner, PACC, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs, max_sample_size=max_sample_size)
def EHDy(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
return ensembleFactory(learner, HDy, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
def EHDy(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, max_sample_size=None):
return ensembleFactory(learner, HDy, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs, max_sample_size=max_sample_size)
def EEMQ(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1):
return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs)
def EEMQ(learner, param_grid=None, optim=None, param_mod_sel=None, size=50, min_pos=1, red_size=25, policy='ave', n_jobs=1, max_sample_size=None):
return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, size, min_pos, red_size, policy, n_jobs, max_sample_size=max_sample_size)

View File

@ -15,12 +15,12 @@ class QuaNetTrainer(BaseQuantifier):
def __init__(self,
learner,
sample_size,
n_epochs=500,
tr_iter_per_poch=200,
va_iter_per_poch=21,
n_epochs=100,
tr_iter_per_poch=500,
va_iter_per_poch=100,
lr=1e-3,
lstm_hidden_size=128,
lstm_nlayers=2,
lstm_hidden_size=64,
lstm_nlayers=1,
ff_layers=[1024, 512],
bidirectional=True,
qdrop_p=0.5,
@ -60,30 +60,30 @@ class QuaNetTrainer(BaseQuantifier):
self.__check_params_colision(self.quanet_params, self.learner.get_params())
def fit(self, data: LabelledCollection, fit_learner=True, *args):
def fit(self, data: LabelledCollection, fit_learner=True):
"""
:param data: the training data on which to train QuaNet. If fit_learner=True, the data will be split in
40/40/20 for training the classifier, training QuaNet, and validating QuaNet, respectively. If
fit_learner=False, the data will be split in 66/34 for training QuaNet and validating it, respectively.
:param fit_learner: if true, trains the classifier on a split containing 40% of the data
:param args: unused
:return: self
"""
# split: 40% for training classification, 40% for training quapy, and 20% for validating quapy
#self.learner, unused_data = \
# training_helper(self.learner, data, fit_learner, ensure_probabilistic=True, val_split=0.6)
classifier_data, unused_data = data.split_stratified(0.4)
train_data, valid_data = unused_data.split_stratified(0.66) # 0.66 split of 60% makes 40% and 20%
self.learner.fit(*classifier_data.Xy)
print('Classifier data: ', len(classifier_data))
print('Q-Training data: ', len(train_data))
print('Q-Valid data: ', len(valid_data))
# estimate the hard and soft stats tpr and fpr of the classifier
self.tr_prev = data.prevalence()
self.learner.fit(*classifier_data.Xy)
self.quantifiers = {
'cc': CC(self.learner).fit(classifier_data, fit_learner=False),
'acc': ACC(self.learner).fit(classifier_data, fit_learner=True),
'acc': ACC(self.learner).fit(classifier_data, fit_learner=False, val_split=valid_data),
'pcc': PCC(self.learner).fit(classifier_data, fit_learner=False),
'pacc': PACC(self.learner).fit(classifier_data, fit_learner=True),
'pacc': PACC(self.learner).fit(classifier_data, fit_learner=False, val_split=valid_data),
'emq': EMQ(self.learner).fit(classifier_data, fit_learner=False),
}
@ -91,13 +91,15 @@ class QuaNetTrainer(BaseQuantifier):
valid_posteriors = self.learner.predict_proba(valid_data.instances)
train_posteriors = self.learner.predict_proba(train_data.instances)
# turn instances' indexes into embeddings
# turn instances' original representations into embeddings
valid_data.instances = self.learner.transform(valid_data.instances)
train_data.instances = self.learner.transform(train_data.instances)
self.status = {
'tr-loss': -1,
'va-loss': -1,
'tr-mae': -1,
'va-mae': -1,
}
nQ = len(self.quantifiers)
@ -105,10 +107,11 @@ class QuaNetTrainer(BaseQuantifier):
self.quanet = QuaNetModule(
doc_embedding_size=train_data.instances.shape[1],
n_classes=data.n_classes,
stats_size=nQ*nC + 2*nC*nC,
stats_size=nQ*nC, #+ 2*nC*nC,
order_by=0 if data.binary else None,
**self.quanet_params
).to(self.device)
print(self.quanet)
self.optim = torch.optim.Adam(self.quanet.parameters(), lr=self.lr)
early_stop = EarlyStop(self.patience, lower_is_better=True)
@ -139,8 +142,8 @@ class QuaNetTrainer(BaseQuantifier):
prevs_estim.extend(quantifier.aggregate(predictions))
# add the class-conditional predictions P(y'i|yj) from ACC and PACC
prevs_estim.extend(self.quantifiers['acc'].Pte_cond_estim_.flatten())
prevs_estim.extend(self.quantifiers['pacc'].Pte_cond_estim_.flatten())
# prevs_estim.extend(self.quantifiers['acc'].Pte_cond_estim_.flatten())
# prevs_estim.extend(self.quantifiers['pacc'].Pte_cond_estim_.flatten())
return prevs_estim
@ -158,11 +161,23 @@ class QuaNetTrainer(BaseQuantifier):
def epoch(self, data: LabelledCollection, posteriors, iterations, epoch, early_stop, train):
mse_loss = MSELoss()
prevpoints = F.get_nprevpoints_approximation(iterations, self.quanet.n_classes)
# prevpoints = F.get_nprevpoints_approximation(iterations, self.quanet.n_classes)
# iterations = F.num_prevalence_combinations(prevpoints, self.quanet.n_classes)
self.quanet.train(mode=train)
losses = []
pbar = tqdm(data.artificial_sampling_index_generator(self.sample_size, prevpoints))
mae_errors = []
if train==False:
prevpoints = F.get_nprevpoints_approximation(iterations, self.quanet.n_classes)
iterations = F.num_prevalence_combinations(prevpoints, self.quanet.n_classes)
with qp.util.temp_seed(0):
sampling_index_gen = data.artificial_sampling_index_generator(self.sample_size, prevpoints)
else:
# sampling_index_gen = data.artificial_sampling_index_generator(self.sample_size, prevpoints)
sampling_index_gen = [data.sampling_index(self.sample_size, *prev) for prev in F.uniform_simplex_sampling(data.n_classes, iterations)]
pbar = tqdm(sampling_index_gen, total=iterations) if train else sampling_index_gen
rand_it_show = np.random.randint(iterations)
for it, index in enumerate(pbar):
sample_data = data.sampling_from_index(index)
sample_posteriors = posteriors[index]
@ -172,21 +187,46 @@ class QuaNetTrainer(BaseQuantifier):
self.optim.zero_grad()
phat = self.quanet.forward(sample_data.instances, sample_posteriors, quant_estims)
loss = mse_loss(phat, ptrue)
mae = mae_loss(phat, ptrue)
loss.backward()
self.optim.step()
else:
with torch.no_grad():
phat = self.quanet.forward(sample_data.instances, sample_posteriors, quant_estims)
loss = mse_loss(phat, ptrue)
mae = mae_loss(phat, ptrue)
losses.append(loss.item())
mae_errors.append(mae.item())
mse = np.mean(losses)
mae = np.mean(mae_errors)
if train:
self.status['tr-loss'] = mse
self.status['tr-mae'] = mae
else:
self.status['va-loss'] = mse
self.status['va-mae'] = mae
if train:
pbar.set_description(f'[QuaNet] '
f'epoch={epoch} [it={it}/{iterations}]\t'
f'tr-mseloss={self.status["tr-loss"]:.5f} tr-maeloss={self.status["tr-mae"]:.5f}\t'
f'val-mseloss={self.status["va-loss"]:.5f} val-maeloss={self.status["va-mae"]:.5f} '
f'patience={early_stop.patience}/{early_stop.PATIENCE_LIMIT}')
# if it==rand_it_show:
# print()
# print('='*100)
# print('Training: ' if train else 'Validation:')
# print('=' * 100)
# print('True: ', ptrue.cpu().numpy().flatten())
# print('Estim: ', phat.detach().cpu().numpy().flatten())
# for pred, name in zip(np.asarray(quant_estims).reshape(-1,data.n_classes),
# ['cc', 'acc', 'pcc', 'pacc', 'emq', 'Pte[acc]','','','Pte[pacc]','','']):
# print(name, pred)
self.status['tr-loss' if train else 'va-loss'] = np.mean(losses[-10:])
pbar.set_description(f'[QuaNet][{"training" if train else "validating"}] '
f'epoch={epoch} [it={it}/{iterations}]\t'
f'tr-loss={self.status["tr-loss"]:.5f} '
f'val-loss={self.status["va-loss"]:.5f} '
f'patience={early_stop.patience}/{early_stop.PATIENCE_LIMIT}')
def get_params(self, deep=True):
return {**self.learner.get_params(), **self.quanet_params}
@ -216,6 +256,9 @@ class QuaNetTrainer(BaseQuantifier):
shutil.rmtree(self.checkpointdir, ignore_errors=True)
def mae_loss(output, target):
return torch.mean(torch.abs(output - target))
class QuaNetModule(torch.nn.Module):
def __init__(self,
@ -227,7 +270,7 @@ class QuaNetModule(torch.nn.Module):
ff_layers=[1024, 512],
bidirectional=True,
qdrop_p=0.5,
order_by=None):
order_by=0):
super().__init__()
self.n_classes = n_classes
@ -277,12 +320,12 @@ class QuaNetModule(torch.nn.Module):
embeded_posteriors = torch.cat((doc_embeddings, doc_posteriors), dim=-1)
# the entire set represents only one instance in quapy contexts, and so the batch_size=1
# the shape should be (1, number-of-instances, embedding-size + 1)
# the shape should be (1, number-of-instances, embedding-size + n_classes)
embeded_posteriors = embeded_posteriors.unsqueeze(0)
self.lstm.flatten_parameters()
_, (rnn_hidden,_) = self.lstm(embeded_posteriors, self.init_hidden())
rnn_hidden = rnn_hidden.view(self.nlayers, self.ndirections, -1, self.hidden_size)
rnn_hidden = rnn_hidden.view(self.nlayers, self.ndirections, 1, self.hidden_size)
quant_embedding = rnn_hidden[0].view(-1)
quant_embedding = torch.cat((quant_embedding, statistics))

View File

@ -21,7 +21,7 @@ class GridSearchQ(BaseQuantifier):
eval_budget : int = None,
error: Union[Callable, str] = qp.error.mae,
refit=False,
n_jobs=-1,
n_jobs=1,
random_seed=42,
timeout=-1,
verbose=False):
@ -158,7 +158,7 @@ class GridSearchQ(BaseQuantifier):
model.fit(training)
true_prevalences, estim_prevalences = artificial_sampling_prediction(
model, val_split, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
verbose=False
verbose=True
)
score = self.error(true_prevalences, estim_prevalences)

48
test.py
View File

@ -30,7 +30,7 @@ if binary:
#qp.data.preprocessing.index(dataset, inplace=True)
else:
dataset = qp.datasets.fetch_twitter('hcr', for_model_selection=False, min_df=10, pickle=True)
dataset = qp.datasets.fetch_twitter('gasp', for_model_selection=False, min_df=5, pickle=True)
#dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.test)}')
@ -56,19 +56,25 @@ print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.tes
#learner = LogisticRegression(max_iter=1000)
# model = qp.method.aggregative.ClassifyAndCount(learner)
learner = LogisticRegression(max_iter=1000)
model = qp.method.meta.EPACC(learner, size=10, red_size=5, max_sample_size=200)
# param_grid={'C':[1,10,100]},
# optim='mae', param_mod_sel={'sample_size':100, 'n_prevpoints':21, 'n_repetitions':5},
# policy='ptr', n_jobs=1)
# regressor = LinearSVR(max_iter=10000)
# param_grid = {'C': np.logspace(-1,3,5)}
# model = AveragePoolQuantification(regressor, sample_size, trials=5000, n_components=500, zscore=False)
#model = qp.method.meta.EPACC(learner, size=10, red_size=5,
# param_grid={'C':[1,10,100]},
# optim='mae', param_mod_sel={'sample_size':100, 'n_prevpoints':21, 'n_repetitions':5},
# policy='ptr', n_jobs=1)
regressor = LinearSVR(max_iter=10000)
param_grid = {'C': np.logspace(-1,3,5)}
model = AveragePoolQuantification(regressor, sample_size, trials=5000, n_components=500, zscore=False)
#model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae',
# model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae',
# sample_size=sample_size, eval_budget=max_evaluations//10, n_jobs=-1)
#model = qp.method.aggregative.ClassifyAndCount(learner)
# model = qp.method.meta.QuaNet(PCALR(n_components=100, max_iter=1000),
# sample_size=100,
# patience=10,
# tr_iter_per_poch=500, va_iter_per_poch=100, #lstm_nlayers=2, lstm_hidden_size=64,
# ff_layers=[500, 250, 50],
# checkpointdir='./checkpoint', device='cuda')
if qp.isbinary(model) and not qp.isbinary(dataset):
model = qp.method.aggregative.OneVsAll(model)
@ -87,17 +93,17 @@ model.fit(dataset.training)
# estimating class prevalences
print('quantifying')
prevalences_estim = model.quantify(dataset.test.instances)
prevalences_true = dataset.test.prevalence()
# print('quantifying')
# prevalences_estim = model.quantify(dataset.test.instances)
# prevalences_true = dataset.test.prevalence()
#
# evaluation (one single prediction)
error = qp.error.mae(prevalences_true, prevalences_estim)
print(f'Evaluation in test (1 eval)')
print(f'true prevalence {F.strprev(prevalences_true)}')
print(f'estim prevalence {F.strprev(prevalences_estim)}')
print(f'mae={error:.3f}')
# error = qp.error.mae(prevalences_true, prevalences_estim)
#
# print(f'Evaluation in test (1 eval)')
# print(f'true prevalence {F.strprev(prevalences_true)}')
# print(f'estim prevalence {F.strprev(prevalences_estim)}')
# print(f'mae={error:.3f}')
# Model fit and Evaluation according to the artificial sampling protocol
@ -117,7 +123,7 @@ for error in qp.error.QUANTIFICATION_ERROR:
score = error(true_prev, estim_prev)
print(f'{error.__name__}={score:.5f}')
#sys.exit(0)
sys.exit(0)
# Model selection and Evaluation according to the artificial sampling protocol
# ----------------------------------------------------------------------------