1
0
Fork 0

testing quapy via replicating Tweet Quantification experiments

This commit is contained in:
Alejandro Moreo Fernandez 2021-01-12 17:39:00 +01:00
parent 3e07feda3c
commit 3c5a53bdec
7 changed files with 343 additions and 146 deletions

View File

@ -0,0 +1,136 @@
from sklearn.linear_model import LogisticRegression
import quapy as qp
import quapy.functional as F
import numpy as np
import os
import pickle
import itertools
from joblib import Parallel, delayed
import multiprocessing
n_jobs = multiprocessing.cpu_count()
def quantification_models():
def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
__C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
yield 'cc', qp.method.aggregative.CC(newLR()), lr_params
yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
def evaluate_experiment(true_prevalences, estim_prevalences):
print('\nEvaluation Metrics:\n'+'='*22)
for eval_measure in [qp.error.mae, qp.error.mrae]:
err = eval_measure(true_prevalences, estim_prevalences)
print(f'\t{eval_measure.__name__}={err:.4f}')
print()
def evaluate_method_point_test(true_prev, estim_prev):
print('\nPoint-Test evaluation:\n' + '=' * 22)
print(f'true-prev={F.strprev(true_prev)}, estim-prev={F.strprev(estim_prev)}')
for eval_measure in [qp.error.mae, qp.error.mrae]:
err = eval_measure(true_prev, estim_prev)
print(f'\t{eval_measure.__name__}={err:.4f}')
def result_path(dataset_name, model_name, optim_loss):
return f'./results/{dataset_name}-{model_name}-{optim_loss}.pkl'
def is_already_computed(dataset_name, model_name, optim_loss):
if dataset_name=='semeval':
check_datasets = ['semeval13', 'semeval14', 'semeval15']
else:
check_datasets = [dataset_name]
return all(os.path.exists(result_path(name, model_name, optim_loss)) for name in check_datasets)
def save_results(dataset_name, model_name, optim_loss, *results):
rpath = result_path(dataset_name, model_name, optim_loss)
qp.util.create_parent_dir(rpath)
with open(rpath, 'wb') as foo:
pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
def run(experiment):
sample_size = 100
qp.environ['SAMPLE_SIZE'] = sample_size
optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
if is_already_computed(dataset_name, model_name, optim_loss=optim_loss):
print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
return
benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)
# model selection (hyperparameter optimization for a quantification-oriented loss)
model_selection = qp.model_selection.GridSearchQ(
model,
param_grid=hyperparams,
sample_size=sample_size,
n_prevpoints=21,
n_repetitions=5,
error='mae',
refit=False,
verbose=True
)
model_selection.fit(benchmark_devel.training, benchmark_devel.test)
model = model_selection.best_model()
# model evaluation
test_names = [dataset_name] if dataset_name != 'semeval' else ['semeval13', 'semeval14', 'semeval15']
for test_no, test_name in enumerate(test_names):
benchmark_eval = qp.datasets.fetch_twitter(test_name, for_model_selection=False, min_df=5, pickle=True)
if test_no == 0:
# fits the model only the first time
model.fit(benchmark_eval.training)
true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(
model,
test=benchmark_eval.test,
sample_size=sample_size,
n_prevpoints=21,
n_repetitions=25
)
test_estim_prevalence = model.quantify(benchmark_eval.test.instances)
test_true_prevalence = benchmark_eval.test.prevalence()
evaluate_experiment(true_prevalences, estim_prevalences)
evaluate_method_point_test(test_true_prevalence, test_estim_prevalence)
save_results(test_name, model_name, optim_loss,
true_prevalences, estim_prevalences,
benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence,
model_selection.best_params_)
if __name__ == '__main__':
np.random.seed(0)
optim_losses = ['mae', 'mrae']
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
models = quantification_models()
results = Parallel(n_jobs=n_jobs)(
delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)
)
# QUANTIFIER_ALIASES = {
# 'emq': lambda learner: ExpectationMaximizationQuantifier(learner),
# 'svmq': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='q'),
# 'svmkld': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='kld'),
# 'svmnkld': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='nkld'),
# 'svmmae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mae'),
# 'svmmrae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mrae'),
# 'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(),
# }
#

187
TweetSentQuant/tables.py Normal file
View File

@ -0,0 +1,187 @@
import quapy as qp
from os import makedirs
# from evaluate import evaluate_directory, statistical_significance, get_ranks_from_Gao_Sebastiani
import sys, os
import pickle
from experiments import result_path
tables_path = './tables'
MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results
makedirs(tables_path, exist_ok=True)
sample_size = 100
qp.environ['SAMPLE_SIZE'] = sample_size
# results_dict = evaluate_directory('results/*.pkl', evaluation_measures)
# stats = {
# dataset : {
# 'mae': statistical_significance(f'results/{dataset}-*-mae-run?.pkl', ae),
# 'mrae': statistical_significance(f'results/{dataset}-*-mrae-run?.pkl', rae),
# } for dataset in datasets
# }
nice = {
'mae':'AE',
'mrae':'RAE',
'svmkld': 'SVM(KLD)',
'svmnkld': 'SVM(NKLD)',
'svmq': 'SVM(Q)',
'svmae': 'SVM(AE)',
'svmnae': 'SVM(NAE)',
'svmmae': 'SVM(AE)',
'svmmrae': 'SVM(RAE)',
'quanet': 'QuaNet',
'hdy': 'HDy',
'dys': 'DyS',
'svmperf':'',
'sanders': 'Sanders',
'semeval13': 'SemEval13',
'semeval14': 'SemEval14',
'semeval15': 'SemEval15',
'semeval16': 'SemEval16'
}
# }
# }
def nicerm(key):
return '\mathrm{'+nice[key]+'}'
def color_from_rel_rank(rel_rank, maxtone=100):
rel_rank = rel_rank*2-1
if rel_rank < 0:
color = 'red'
tone = maxtone*(-rel_rank)
else:
color = 'green'
tone = maxtone*rel_rank
return '\cellcolor{' + color + f'!{int(tone)}' + '}'
def color_from_abs_rank(abs_rank, n_methods, maxtone=100):
rel_rank = 1.-(abs_rank-1.)/(n_methods-1)
return color_from_rel_rank(rel_rank, maxtone)
def save_table(path, table):
print(f'saving results in {path}')
with open(path, 'wt') as foo:
foo.write(table)
# Tables evaluation scores for AE and RAE (two tables)
# ----------------------------------------------------
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
evaluation_measures = [qp.error.mae, qp.error.mrae]
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld']
results_dict = {}
stats={}
def getscore(dataset, method, loss):
path = result_path(dataset, method, loss)
if os.path.exists(path):
true_prevs, estim_prevs, _, _, _, _ = pickle.load(open(path, 'rb'))
err = getattr(qp.error, loss)
return err(true_prevs, estim_prevs)
return None
for i, eval_func in enumerate(evaluation_measures):
eval_name = eval_func.__name__
added_methods = ['svm' + eval_name] # , 'quanet', 'dys']
methods = gao_seb_methods + added_methods
nold_methods = len(gao_seb_methods)
nnew_methods = len(added_methods)
tabular = """
\\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*len(gao_seb_methods))+ '|' + ('Y|'*len(added_methods)) + """} \hline
& \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & \multicolumn{"""+str(nnew_methods)+"""}{c||}{} \\\\ \hline
"""
for method in methods:
tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
tabular += '\\\\\hline\n'
for dataset in datasets:
tabular += nice.get(dataset, dataset.upper()) + ' '
for method in methods:
#simplify...
score = getscore(dataset, method, eval_name)
if score:
tabular += f' & {score:.3f} '
else:
tabular += ' & --- '
tabular += '\\\\\hline\n'
tabular += "\end{tabularx}"
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
sys.exit(0)
# gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
# Tables ranks for AE and RAE (two tables)
# ----------------------------------------------------
# for i, eval_func in enumerate(evaluation_measures):
# eval_name = eval_func.__name__
# methods = ['cc', 'acc', 'pcc', 'pacc', 'emq', 'svmq', 'svmkld', 'svmnkld']
# table = """
# \\begin{table}[h]
# """
# if i == 0:
# caption = """
# \caption{Rank positions of the quantification methods in the AE
# experiments, and (between parentheses) the rank positions
# obtained in the evaluation of~\cite{Gao:2016uq}.}
# """
# else:
# caption = "\caption{Same as Table~\\ref{tab:maeranks}, but with " + nice[eval_name] + " instead of AE.}"
# table += caption + """
# \\begin{center}
# \\resizebox{\\textwidth}{!}{
# """
# tabular = """
# \\begin{tabularx}{\\textwidth}{|c||Y|Y|Y|Y|Y|Y|Y|Y|} \hline
# & \multicolumn{8}{c|}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline
# """
#
# for method in methods:
# tabular += ' & \side{' + nice.get(method, method.upper()) +'$^{' + nicerm(eval_name) + '}$} '
# tabular += '\\\\\hline\n'
#
# for dataset in datasets:
# tabular += nice.get(dataset, dataset.upper()) + ' '
# ranks_no_gap = []
# for method in methods:
# learner = 'lr' if not method.startswith('svm') else 'svmperf'
# key = f'{dataset}-{method}-{learner}-{}-{eval_name}'
# ranks_no_gap.append(stats[dataset][eval_name].get(key, (None, None, len(methods)))[2])
# ranks_no_gap = sorted(ranks_no_gap)
# ranks_no_gap = {rank:i+1 for i,rank in enumerate(ranks_no_gap)}
# for method in methods:
# learner = 'lr' if not method.startswith('svm') else 'svmperf'
# key = f'{dataset}-{method}-{learner}-{sample_size}-{eval_name}'
# if key in stats[dataset][eval_name]:
# _, _, abs_rank = stats[dataset][eval_name][key]
# real_rank = ranks_no_gap[abs_rank]
# tabular += f' & {real_rank}'
# tabular += color_from_abs_rank(real_rank, len(methods), maxtone=MAXTONE)
# else:
# tabular += ' & --- '
# old_rank = gao_seb_ranks.get(f'{dataset}-{method}-{eval_name}', 'error')
# tabular += f' ({old_rank})'
# tabular += '\\\\\hline\n'
# tabular += "\end{tabularx}"
# table += tabular + """
# }
# \end{center}
# \label{tab:""" + eval_name + """ranks}
# \end{table}
# """
# save_table(f'../tables/tab_rank_{eval_name}.tex', table)
#
#
# print("[Done]")

View File

@ -9,9 +9,12 @@ import pandas as pd
REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb'] REVIEWS_SENTIMENT_DATASETS = ['hp', 'kindle', 'imdb']
TWITTER_SENTIMENT_DATASETS = ['gasp', 'hcr', 'omd', 'sanders', TWITTER_SENTIMENT_DATASETS_TEST = ['gasp', 'hcr', 'omd', 'sanders',
'semeval13', 'semeval14', 'semeval15', 'semeval16', 'semeval13', 'semeval14', 'semeval15', 'semeval16',
'sst', 'wa', 'wb'] 'sst', 'wa', 'wb']
TWITTER_SENTIMENT_DATASETS_TRAIN = ['gasp', 'hcr', 'omd', 'sanders',
'semeval', 'semeval16',
'sst', 'wa', 'wb']
def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False): def fetch_reviews(dataset_name, tfidf=False, min_df=None, data_home=None, pickle=False):
@ -63,6 +66,7 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom
Load a Twitter dataset as a Dataset instance, as used in: Load a Twitter dataset as a Dataset instance, as used in:
Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis. Gao, W., Sebastiani, F.: From classification to quantification in tweet sentiment analysis.
Social Network Analysis and Mining6(19), 122 (2016) Social Network Analysis and Mining6(19), 122 (2016)
The datasets 'semeval13', 'semeval14', 'semeval15' share the same training set.
:param dataset_name: the name of the dataset: valid ones are 'gasp', 'hcr', 'omd', 'sanders', 'semeval13', :param dataset_name: the name of the dataset: valid ones are 'gasp', 'hcr', 'omd', 'sanders', 'semeval13',
'semeval14', 'semeval15', 'semeval16', 'sst', 'wa', 'wb' 'semeval14', 'semeval15', 'semeval16', 'sst', 'wa', 'wb'
@ -76,9 +80,11 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom
faster subsequent invokations faster subsequent invokations
:return: a Dataset instance :return: a Dataset instance
""" """
assert dataset_name in TWITTER_SENTIMENT_DATASETS, \ assert dataset_name in TWITTER_SENTIMENT_DATASETS_TRAIN + TWITTER_SENTIMENT_DATASETS_TEST, \
f'Name {dataset_name} does not match any known dataset for sentiment twitter. ' \ f'Name {dataset_name} does not match any known dataset for sentiment twitter. ' \
f'Valid ones are {TWITTER_SENTIMENT_DATASETS}' f'Valid ones are {TWITTER_SENTIMENT_DATASETS_TRAIN} for model selection and ' \
f'{TWITTER_SENTIMENT_DATASETS_TEST} for test (datasets "semeval14", "semeval15", "semeval16" share ' \
f'a common training set "semeval")'
if data_home is None: if data_home is None:
data_home = get_quapy_home() data_home = get_quapy_home()
@ -97,6 +103,9 @@ def fetch_twitter(dataset_name, for_model_selection=False, min_df=None, data_hom
print(f"the training and development sets for datasets 'semeval13', 'semeval14', 'semeval15' are common " print(f"the training and development sets for datasets 'semeval13', 'semeval14', 'semeval15' are common "
f"(called 'semeval'); returning trainin-set='{trainset_name}' and test-set={testset_name}") f"(called 'semeval'); returning trainin-set='{trainset_name}' and test-set={testset_name}")
else: else:
if dataset_name == 'semeval' and for_model_selection==False:
raise ValueError('dataset "semeval" can only be used for model selection. '
'Use "semeval13", "semeval14", or "semeval15" for model evaluation.')
trainset_name = testset_name = dataset_name trainset_name = testset_name = dataset_name
if for_model_selection: if for_model_selection:

View File

@ -137,7 +137,7 @@ class IndexTransformer:
def index(self, documents): def index(self, documents):
vocab = self.vocabulary_.copy() vocab = self.vocabulary_.copy()
return [[vocab.get(word, self.unk) for word in self.analyzer(doc)] for doc in tqdm(documents, 'indexing')] return [[vocab.getscore(word, self.unk) for word in self.analyzer(doc)] for doc in tqdm(documents, 'indexing')]
def fit_transform(self, X, n_jobs=-1): def fit_transform(self, X, n_jobs=-1):
return self.fit(X).transform(X, n_jobs=n_jobs) return self.fit(X).transform(X, n_jobs=n_jobs)

View File

@ -39,17 +39,17 @@ def artificial_sampling_prediction(
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions)) indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
if isinstance(model, qp.method.aggregative.AggregativeQuantifier): if isinstance(model, qp.method.aggregative.AggregativeQuantifier):
print('\tinstance of aggregative-quantifier') # print('\tinstance of aggregative-quantifier')
quantification_func = model.aggregate quantification_func = model.aggregate
if isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier): if isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier):
print('\t\tinstance of probabilitstic-aggregative-quantifier') # print('\t\tinstance of probabilitstic-aggregative-quantifier')
preclassified_instances = model.posterior_probabilities(test.instances) preclassified_instances = model.posterior_probabilities(test.instances)
else: else:
print('\t\tinstance of hard-aggregative-quantifier') # print('\t\tinstance of hard-aggregative-quantifier')
preclassified_instances = model.classify(test.instances) preclassified_instances = model.classify(test.instances)
test = LabelledCollection(preclassified_instances, test.labels) test = LabelledCollection(preclassified_instances, test.labels)
else: else:
print('\t\tinstance of base-quantifier') # print('\t\tinstance of base-quantifier')
quantification_func = model.quantify quantification_func = model.quantify
def _predict_prevalences(index): def _predict_prevalences(index):

View File

@ -112,7 +112,7 @@ class GridSearchQ(BaseQuantifier):
raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n' raise ValueError(f'unexpected error type; must either be a callable function or a str representing\n'
f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}') f'the name of an error function in {qp.error.QUANTIFICATION_ERROR_NAMES}')
def fit(self, training: LabelledCollection, validation: Union[LabelledCollection, float]=0.3): def fit(self, training: LabelledCollection, validation: Union[LabelledCollection, float]=0.4):
""" """
:param training: the training set on which to optimize the hyperparameters :param training: the training set on which to optimize the hyperparameters
:param validation: either a LabelledCollection on which to test the performance of the different settings, or :param validation: either a LabelledCollection on which to test the performance of the different settings, or
@ -121,6 +121,8 @@ class GridSearchQ(BaseQuantifier):
training, validation = self.__check_training_validation(training, validation) training, validation = self.__check_training_validation(training, validation)
self.__check_num_evals(self.n_prevpoints, self.eval_budget, self.n_repetitions, training.n_classes) self.__check_num_evals(self.n_prevpoints, self.eval_budget, self.n_repetitions, training.n_classes)
print(f'training size={len(training)}')
print(f'validation size={len(validation)}')
params_keys = list(self.param_grid.keys()) params_keys = list(self.param_grid.keys())
params_values = list(self.param_grid.values()) params_values = list(self.param_grid.values())

View File

@ -1,137 +0,0 @@
from sklearn.linear_model import LogisticRegression
import quapy as qp
import quapy.functional as F
import numpy as np
import os
import sys
import pickle
qp.environ['SAMPLE_SIZE'] = 100
sample_size = qp.environ['SAMPLE_SIZE']
def evaluate_experiment(true_prevalences, estim_prevalences, n_repetitions=25):
#n_classes = true_prevalences.shape[1]
#true_ave = true_prevalences.reshape(-1, n_repetitions, n_classes).mean(axis=1)
#estim_ave = estim_prevalences.reshape(-1, n_repetitions, n_classes).mean(axis=1)
#estim_std = estim_prevalences.reshape(-1, n_repetitions, n_classes).std(axis=1)
#print('\nTrueP->mean(Phat)(std(Phat))\n'+'='*22)
#for true, estim, std in zip(true_ave, estim_ave, estim_std):
# str_estim = ', '.join([f'{mean:.3f}+-{std:.4f}' for mean, std in zip(estim, std)])
# print(f'{F.strprev(true)}->[{str_estim}]')
print('\nEvaluation Metrics:\n'+'='*22)
for eval_measure in [qp.error.mae, qp.error.mrae]:
err = eval_measure(true_prevalences, estim_prevalences)
print(f'\t{eval_measure.__name__}={err:.4f}')
print()
def evaluate_method_point_test(method, test):
estim_prev = method.quantify(test.instances)
true_prev = F.prevalence_from_labels(test.labels, test.n_classes)
print('\nPoint-Test evaluation:\n' + '=' * 22)
print(f'true-prev={F.strprev(true_prev)}, estim-prev={F.strprev(estim_prev)}')
for eval_measure in [qp.error.mae, qp.error.mrae]:
err = eval_measure(true_prev, estim_prev)
print(f'\t{eval_measure.__name__}={err:.4f}')
def quantification_models():
def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
__C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
#yield 'cc', qp.method.aggregative.CC(newLR()), lr_params
#yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
#yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
def result_path(dataset_name, model_name, optim_metric):
return f'{dataset_name}-{model_name}-{optim_metric}.pkl'
def check_already_computed(dataset_name, model_name, optim_metric):
path = result_path(dataset_name, model_name, optim_metric)
return os.path.exists(path)
def save_results(dataset_name, model_name, optim_metric, *results):
path = result_path(dataset_name, model_name, optim_metric)
qp.util.create_parent_dir(path)
with open(path, 'wb') as foo:
pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
if __name__ == '__main__':
np.random.seed(0)
for dataset_name in ['sanders']: # qp.datasets.TWITTER_SENTIMENT_DATASETS:
benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)
benchmark_devel.stats()
for model_name, model, hyperparams in quantification_models():
model_selection = qp.model_selection.GridSearchQ(
model,
param_grid=hyperparams,
sample_size=sample_size,
n_prevpoints=21,
n_repetitions=5,
error='mae',
refit=False,
verbose=True
)
model_selection.fit(benchmark_devel.training, benchmark_devel.test)
model = model_selection.best_model()
benchmark_eval = qp.datasets.fetch_twitter(dataset_name, for_model_selection=False, min_df=5, pickle=True)
model.fit(benchmark_eval.training)
true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(
model,
test=benchmark_eval.test,
sample_size=sample_size,
n_prevpoints=21,
n_repetitions=25
)
evaluate_experiment(true_prevalences, estim_prevalences, n_repetitions=25)
evaluate_method_point_test(model, benchmark_eval.test)
#save_arrays(FLAGS.results, true_prevalences, estim_prevalences, test_name)
sys.exit(0)
# decide the test to be performed (in the case of 'semeval', tests are 'semeval13', 'semeval14', 'semeval15')
if FLAGS.dataset == 'semeval':
test_sets = ['semeval13', 'semeval14', 'semeval15']
else:
test_sets = [FLAGS.dataset]
evaluate_method_point_test(method, benchmark_eval.test, test_name=test_set)
# quantifiers:
# ----------------------------------------
# alias for quantifiers and default configurations
QUANTIFIER_ALIASES = {
'cc': lambda learner: ClassifyAndCount(learner),
'acc': lambda learner: AdjustedClassifyAndCount(learner),
'pcc': lambda learner: ProbabilisticClassifyAndCount(learner),
'pacc': lambda learner: ProbabilisticAdjustedClassifyAndCount(learner),
'emq': lambda learner: ExpectationMaximizationQuantifier(learner),
'svmq': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='q'),
'svmkld': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='kld'),
'svmnkld': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='nkld'),
'svmmae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mae'),
'svmmrae': lambda learner: OneVsAllELM(settings.SVM_PERF_HOME, loss='mrae'),
'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(),
}