This commit is contained in:
Alejandro Moreo Fernandez 2021-03-11 09:35:10 +01:00
parent eabfb34626
commit 77fcb708a6
14 changed files with 39 additions and 1085 deletions

View File

@ -1,43 +1,13 @@
from sklearn.calibration import CalibratedClassifierCV from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import LinearSVC from sklearn.svm import LinearSVC
from fgsld.fgsld_quantifiers import FakeFGLSD
from NewMethods.fgsld.fine_grained_sld import FineGrainedSLD
from method.aggregative import EMQ, CC from method.aggregative import EMQ, CC
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier
import quapy as qp import quapy as qp
import quapy.functional as F
from sklearn.linear_model import LogisticRegression
class FakeFGLSD(BaseQuantifier):
def __init__(self, learner, nbins, isomerous):
self.learner = learner
self.nbins = nbins
self.isomerous = isomerous
def fit(self, data: LabelledCollection):
self.Xtr, self.ytr = data.Xy
self.learner.fit(self.Xtr, self.ytr)
return self
def quantify(self, instances):
tr_priors = F.prevalence_from_labels(self.ytr, n_classes=2)
fgsld = FineGrainedSLD(self.Xtr, instances, self.ytr, tr_priors, self.learner, n_bins=self.nbins)
priors, posteriors = fgsld.run(self.isomerous)
return priors
def get_params(self, deep=True):
pass
def set_params(self, **parameters):
pass
qp.environ['SAMPLE_SIZE'] = 500 qp.environ['SAMPLE_SIZE'] = 500
dataset = qp.datasets.fetch_reviews('hp') dataset = qp.datasets.fetch_reviews('kindle')
qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True) qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
training = dataset.training training = dataset.training
@ -50,8 +20,10 @@ method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
for model, model_name in [ for model, model_name in [
(CC(cls), 'CC'), (CC(cls), 'CC'),
(FakeFGLSD(cls, nbins=1, isomerous=False), 'FGSLD-1'), # (FakeFGLSD(cls, nbins=5, isomerous=False, recompute_bins=False), 'FGSLD-isometric-stat-5'),
(FakeFGLSD(cls, nbins=2, isomerous=False), 'FGSLD-2'), (FakeFGLSD(cls, nbins=5, isomerous=True, recompute_bins=True), 'FGSLD-isometric-dyn-5'),
# (FakeFGLSD(cls, nbins=5, isomerous=True, recompute_bins=False), 'FGSLD-isomerous-stat-5'),
# (FakeFGLSD(cls, nbins=10, isomerous=True, recompute_bins=True), 'FGSLD-isomerous-dyn-10'),
#(FakeFGLSD(cls, nbins=5, isomerous=False), 'FGSLD-5'), #(FakeFGLSD(cls, nbins=5, isomerous=False), 'FGSLD-5'),
#(FakeFGLSD(cls, nbins=10, isomerous=False), 'FGSLD-10'), #(FakeFGLSD(cls, nbins=10, isomerous=False), 'FGSLD-10'),
#(FakeFGLSD(cls, nbins=50, isomerous=False), 'FGSLD-50'), #(FakeFGLSD(cls, nbins=50, isomerous=False), 'FGSLD-50'),
@ -64,7 +36,7 @@ for model, model_name in [
print('running ', model_name) print('running ', model_name)
model.fit(training) model.fit(training)
true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction( true_prev, estim_prev = qp.evaluation.artificial_sampling_prediction(
model, test, qp.environ['SAMPLE_SIZE'], n_repetitions=10, n_prevpoints=21, n_jobs=-1 model, test, qp.environ['SAMPLE_SIZE'], n_repetitions=5, n_prevpoints=11, n_jobs=-1
) )
method_names.append(model_name) method_names.append(model_name)
true_prevs.append(true_prev) true_prevs.append(true_prev)

View File

@ -1,6 +1,8 @@
import numpy as np import numpy as np
from metrics import isomerous_bins, isometric_bins from metrics import isomerous_bins, isometric_bins
from em import History, get_measures_single_history from em import History, get_measures_single_history
from sklearn.model_selection import cross_val_predict
import math
class FineGrainedSLD: class FineGrainedSLD:
@ -8,13 +10,13 @@ class FineGrainedSLD:
self.y_tr = y_tr self.y_tr = y_tr
self.clf = clf self.clf = clf
self.tr_priors = tr_priors self.tr_priors = tr_priors
self.tr_preds = clf.predict_proba(x_tr)
self.te_preds = clf.predict_proba(x_te) self.te_preds = clf.predict_proba(x_te)
self.tr_preds = cross_val_predict(clf, x_tr, y_tr, method='predict_proba', n_jobs=10)
self.n_bins = n_bins self.n_bins = n_bins
self.history: [History] = [] self.history: [History] = []
self.multi_class = False self.multi_class = False
def run(self, isomerous_binning, epsilon=1e-6, compute_bins_at_every_iter=False, return_posteriors_hist=False): def run(self, isomerous_binning, epsilon=1e-6, compute_bins_at_every_iter=True, return_posteriors_hist=False):
""" """
Run the FGSLD algorithm. Run the FGSLD algorithm.
@ -24,8 +26,8 @@ class FineGrainedSLD:
:param return_posteriors_hist: whether to return posteriors at every iteration or not. :param return_posteriors_hist: whether to return posteriors at every iteration or not.
:return: If `return_posteriors_hist` is true, the returned posteriors will be a list of numpy arrays, else a single numpy array with posteriors at last iteration. :return: If `return_posteriors_hist` is true, the returned posteriors will be a list of numpy arrays, else a single numpy array with posteriors at last iteration.
""" """
smoothing_tr = 1 / (2 * self.y_tr.shape[0]) smoothing_tr = 1 / (2 * self.tr_preds.shape[0])
smoothing_te = smoothing_tr smoothing_te = 1 / (2 * self.te_preds.shape[0])
s = 0 s = 0
tr_bin_priors = np.zeros((self.n_bins, self.tr_preds.shape[1]), dtype=np.float) tr_bin_priors = np.zeros((self.n_bins, self.tr_preds.shape[1]), dtype=np.float)
te_bin_priors = np.zeros((self.n_bins, self.te_preds.shape[1]), dtype=np.float) te_bin_priors = np.zeros((self.n_bins, self.te_preds.shape[1]), dtype=np.float)
@ -53,15 +55,22 @@ class FineGrainedSLD:
for i, bin_ in enumerate(bins): for i, bin_ in enumerate(bins):
if bin_.shape[0] == 0: if bin_.shape[0] == 0:
continue continue
te = te_bin_priors[i][label_idx]
tr = tr_bin_priors[i][label_idx]
# local_min = (math.floor(tr * 10) / 10)
# local_max = local_min + .1
# trans = lambda l: min(max((l - local_min) / 1, 0), 1)
trans = lambda l: l
self.te_preds[:, label_idx][bin_] = (te_preds_cp[:, label_idx][bin_]) * \ self.te_preds[:, label_idx][bin_] = (te_preds_cp[:, label_idx][bin_]) * \
(te_bin_priors[i][label_idx] / te_bin_priors_prev[i][label_idx]) (trans(te) / trans(tr))
# Normalization step # Normalization step
self.te_preds = (self.te_preds.T / self.te_preds.sum(axis=1)).T self.te_preds = (self.te_preds / self.te_preds.sum(axis=1, keepdims=True))
val = 0 val = 0
for label_idx in range(te_bin_priors.shape[1]): for label_idx in range(te_bin_priors.shape[1]):
if (temp := max(abs((te_bin_priors[:, label_idx] / te_bin_priors_prev[:, label_idx]) - 1))) > val: temp = max(abs((te_bin_priors[:, label_idx] / te_bin_priors_prev[:, label_idx]) - 1))
if temp > val:
val = temp val = temp
s += 1 s += 1
if return_posteriors_hist: if return_posteriors_hist:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 162 KiB

After

Width:  |  Height:  |  Size: 163 KiB

View File

@ -4,7 +4,6 @@ from os import makedirs
import sys, os import sys, os
import pickle import pickle
from experiments import result_path from experiments import result_path
from gen_tables import save_table, experiment_errors
from tabular import Table from tabular import Table
import argparse import argparse
@ -42,6 +41,20 @@ nice = {
'Average': 'Average' 'Average': 'Average'
} }
def save_table(path, table):
print(f'saving results in {path}')
with open(path, 'wt') as foo:
foo.write(table)
def experiment_errors(path, dataset, method, loss):
path = result_path(path, dataset, method, 'm'+loss if not loss.startswith('m') else loss)
if os.path.exists(path):
true_prevs, estim_prevs, _, _, _, _ = pickle.load(open(path, 'rb'))
err_fn = getattr(qp.error, loss)
errors = err_fn(true_prevs, estim_prevs)
return errors
return None
def nicerm(key): def nicerm(key):
return '\mathrm{'+nice[key]+'}' return '\mathrm{'+nice[key]+'}'

View File

@ -1,48 +0,0 @@
from sklearn.linear_model import LogisticRegression
import quapy as qp
from classification.methods import PCALR
from method.meta import QuaNet
from quapy.method.aggregative import *
from NewMethods.methods import *
from experiments import run, SAMPLE_SIZE
import numpy as np
import itertools
from joblib import Parallel, delayed
import settings
import argparse
import torch
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str, help='path to the directory where to store the results')
#parser.add_argument('svmperfpath', metavar='SVMPERF_PATH', type=str, help='path to the directory with svmperf')
args = parser.parse_args()
def quantification_models():
def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
__C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
#yield 'paccsld', PACCSLD(newLR()), lr_params
yield 'hdysld', OneVsAll(HDySLD(newLR())), lr_params # <-- promising!
#device = 'cuda' if torch.cuda.is_available() else 'cpu'
#print(f'Running QuaNet in {device}')
#yield 'quanet', QuaNet(PCALR(**newLR().get_params()), SAMPLE_SIZE, device=device), lr_params
if __name__ == '__main__':
print(f'Result folder: {args.results}')
np.random.seed(0)
optim_losses = ['mae']
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
models = quantification_models()
results = Parallel(n_jobs=settings.N_JOBS)(
delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)
)

View File

@ -1,4 +1,5 @@
import multiprocessing import multiprocessing
N_JOBS = -2 #multiprocessing.cpu_count() N_JOBS = -2 #multiprocessing.cpu_count()
SAMPLE_SIZE = 100 ENSEMBLE_N_JOBS=1
SAMPLE_SIZE = 100

View File

@ -1,89 +0,0 @@
AE RAE
SemEval13 SVM-KLD 0.0722 0.1720
SVM-NKLD 0.0714 0.2756
SVM-QBETA2 0.0782 0.2775
LR-CC 0.0996 0.3095
LR-EM 0.1191 0.3923
LR-PCC 0.0344 0.1506
LR-ACC 0.0806 0.2479
LR-PACC 0.0812 0.2626
SemEval14 SVM-KLD 0.0843 0.2268
SVM-NKLD 0.0836 0.3367
SVM-QBETA2 0.1018 0.3680
LR-CC 0.1043 0.3212
LR-EM 0.0807 0.3517
LR-PCC 0.1001 0.4277
LR-ACC 0.0581 0.2360
LR-PACC 0.0533 0.2573
SemEval15 SVM-KLD 0.1185 0.3789
SVM-NKLD 0.1155 0.4720
SVM-QBETA2 0.1263 0.4762
LR-CC 0.1101 0.2879
LR-EM 0.1204 0.2949
LR-PCC 0.0460 0.1973
LR-ACC 0.1064 0.2971
LR-PACC 0.1013 0.2729
SemEval16 SVM-KLD 0.0385 0.1512
SVM-NKLD 0.0830 0.3249
SVM-QBETA2 0.1201 0.5156
LR-CC 0.0500 0.1771
LR-EM 0.0646 0.2126
LR-PCC 0.0379 0.1553
LR-ACC 0.0542 0.2246
LR-PACC 0.0864 0.3504
Sanders SVM-KLD 0.0134 0.0630
SVM-NKLD 0.0950 0.3965
SVM-QBETA2 0.1098 0.4360
LR-CC 0.0671 0.2682
LR-EM 0.0715 0.2849
LR-PCC 0.0150 0.0602
LR-ACC 0.0338 0.1306
LR-PACC 0.0301 0.1173
SST SVM-KLD 0.0413 0.1458
SVM-NKLD 0.0749 0.2497
SVM-QBETA2 0.0671 0.2343
LR-CC 0.0330 0.1239
LR-EM 0.0369 0.1190
LR-PCC 0.0282 0.1068
LR-ACC 0.0492 0.1689
LR-PACC 0.0841 0.2302
OMD SVM-KLD 0.0305 0.0999
SVM-NKLD 0.0437 0.1279
SVM-QBETA2 0.0624 0.1826
LR-CC 0.0524 0.1527
LR-EM 0.0648 0.1886
LR-PCC 0.0046 0.0095
LR-ACC 0.0239 0.0753
LR-PACC 0.0100 0.0293
HCR SVM-KLD 0.0414 0.2191
SVM-NKLD 0.0604 0.2324
SVM-QBETA2 0.1272 0.4600
LR-CC 0.0525 0.1817
LR-EM 0.0895 0.3093
LR-PCC 0.0055 0.0202
LR-ACC 0.0240 0.1026
LR-PACC 0.0329 0.1436
GASP SVM-KLD 0.0171 0.0529
SVM-NKLD 0.0503 0.3416
SVM-QBETA2 0.0640 0.4402
LR-CC 0.0189 0.1297
LR-EM 0.0231 0.1589
LR-PCC 0.0097 0.0682
LR-ACC 0.0150 0.1038
LR-PACC 0.0087 0.0597
WA SVM-KLD 0.0647 0.1957
SVM-NKLD 0.0393 0.1357
SVM-QBETA2 0.0798 0.2332
LR-CC 0.0434 0.1270
LR-EM 0.0391 0.1145
LR-PCC 0.0338 0.0990
LR-ACC 0.0407 0.1197
LR-PACC 0.0277 0.0815
WB SVM-KLD 0.0613 0.1791
SVM-NKLD 0.0534 0.1756
SVM-QBETA2 0.0249 0.0774
LR-CC 0.0132 0.0399
LR-EM 0.0244 0.0773
LR-PCC 0.0123 0.0390
LR-ACC 0.0230 0.0719
LR-PACC 0.0165 0.0515

View File

@ -1,35 +0,0 @@
import numpy as np
import quapy as qp
import settings
import os
import pickle
from glob import glob
import itertools
import pathlib
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
resultdir = './results'
methods = ['*']
def evaluate_results(methods, datasets, error_name):
results_str = []
all = []
error = qp.error.from_name(error_name)
for method, dataset in itertools.product(methods, datasets):
for experiment in glob(f'{resultdir}/{dataset}-{method}-{error_name}.pkl'):
true_prevalences, estim_prevalences, tr_prev, te_prev, te_prev_estim, best_params = \
pickle.load(open(experiment, 'rb'))
result = error(true_prevalences, estim_prevalences)
string = f'{pathlib.Path(experiment).name}: {result:.3f}'
results_str.append(string)
all.append(result)
results_str = sorted(results_str)
for r in results_str:
print(r)
print()
print(f'Ave: {np.mean(all):.3f}')
evaluate_results(methods=['epacc*mae1k'], datasets=['*'], error_name='mae')

View File

@ -1,214 +0,0 @@
from sklearn.linear_model import LogisticRegression
import quapy as qp
from classification.methods import PCALR
from method.meta import QuaNet
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
from quapy.method.meta import EPACC, EEMQ
import quapy.functional as F
import numpy as np
import os
import pickle
import itertools
from joblib import Parallel, delayed
import settings
import argparse
import torch
import shutil
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
def newLR():
return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1)
__C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
def quantification_models():
# methods tested in Gao & Sebastiani 2016
yield 'cc', CC(newLR()), lr_params
yield 'acc', ACC(newLR()), lr_params
yield 'pcc', PCC(newLR()), lr_params
yield 'pacc', PACC(newLR()), lr_params
yield 'sld', EMQ(newLR()), lr_params
yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
# methods added
yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
yield 'svmmrae', OneVsAll(SVMRAE(args.svmperfpath)), svmperf_params
yield 'hdy', OneVsAll(HDy(newLR())), lr_params
def quantification_cuda_models():
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Running QuaNet in {device}')
learner = PCALR(**newLR().get_params())
yield 'quanet', QuaNet(learner, settings.SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params
def quantification_ensembles():
param_mod_sel = {
'sample_size': settings.SAMPLE_SIZE,
'n_prevpoints': 21,
'n_repetitions': 5,
'verbose': False
}
common={
'max_sample_size': 1000,
'n_jobs': settings.ENSEMBLE_N_JOBS,
'param_grid': lr_params,
'param_mod_sel': param_mod_sel,
'val_split': 0.4,
'min_pos': 10
}
# hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection
# will be skipped (by setting hyperparameters to None)
hyper_none = None
#yield 'epaccmaeptr', EPACC(newLR(), optim='mae', policy='ptr', **common), hyper_none
yield 'epaccmaemae1k', EPACC(newLR(), optim='mae', policy='mae', **common), hyper_none
# yield 'esldmaeptr', EEMQ(newLR(), optim='mae', policy='ptr', **common), hyper_none
# yield 'esldmaemae', EEMQ(newLR(), optim='mae', policy='mae', **common), hyper_none
#yield 'epaccmraeptr', EPACC(newLR(), optim='mrae', policy='ptr', **common), hyper_none
#yield 'epaccmraemrae', EPACC(newLR(), optim='mrae', policy='mrae', **common), hyper_none
#yield 'esldmraeptr', EEMQ(newLR(), optim='mrae', policy='ptr', **common), hyper_none
#yield 'esldmraemrae', EEMQ(newLR(), optim='mrae', policy='mrae', **common), hyper_none
def evaluate_experiment(true_prevalences, estim_prevalences):
print('\nEvaluation Metrics:\n'+'='*22)
for eval_measure in [qp.error.mae, qp.error.mrae]:
err = eval_measure(true_prevalences, estim_prevalences)
print(f'\t{eval_measure.__name__}={err:.4f}')
print()
def evaluate_method_point_test(true_prev, estim_prev):
print('\nPoint-Test evaluation:\n' + '=' * 22)
print(f'true-prev={F.strprev(true_prev)}, estim-prev={F.strprev(estim_prev)}')
for eval_measure in [qp.error.mae, qp.error.mrae]:
err = eval_measure(true_prev, estim_prev)
print(f'\t{eval_measure.__name__}={err:.4f}')
def result_path(path, dataset_name, model_name, optim_loss):
return os.path.join(path, f'{dataset_name}-{model_name}-{optim_loss}.pkl')
def is_already_computed(dataset_name, model_name, optim_loss):
if dataset_name=='semeval':
check_datasets = ['semeval13', 'semeval14', 'semeval15']
else:
check_datasets = [dataset_name]
return all(os.path.exists(result_path(args.results, name, model_name, optim_loss)) for name in check_datasets)
def save_results(dataset_name, model_name, optim_loss, *results):
rpath = result_path(args.results, dataset_name, model_name, optim_loss)
qp.util.create_parent_dir(rpath)
with open(rpath, 'wb') as foo:
pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL)
def run(experiment):
optim_loss, dataset_name, (model_name, model, hyperparams) = experiment
if is_already_computed(dataset_name, model_name, optim_loss=optim_loss):
print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.')
return
elif (optim_loss == 'mae' and 'mrae' in model_name) or (optim_loss=='mrae' and 'mae' in model_name):
print(f'skipping model={model_name} for optim_loss={optim_loss}')
return
else:
print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}')
benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True)
benchmark_devel.stats()
# model selection (hyperparameter optimization for a quantification-oriented loss)
if hyperparams is not None:
model_selection = qp.model_selection.GridSearchQ(
model,
param_grid=hyperparams,
sample_size=settings.SAMPLE_SIZE,
n_prevpoints=21,
n_repetitions=5,
error=optim_loss,
refit=False,
timeout=60*60,
verbose=True
)
model_selection.fit(benchmark_devel.training, benchmark_devel.test)
model = model_selection.best_model()
best_params = model_selection.best_params_
else:
best_params = {}
# model evaluation
test_names = [dataset_name] if dataset_name != 'semeval' else ['semeval13', 'semeval14', 'semeval15']
for test_no, test_name in enumerate(test_names):
benchmark_eval = qp.datasets.fetch_twitter(test_name, for_model_selection=False, min_df=5, pickle=True)
if test_no == 0:
print('fitting the selected model')
# fits the model only the first time
model.fit(benchmark_eval.training)
true_prevalences, estim_prevalences = qp.evaluation.artificial_sampling_prediction(
model,
test=benchmark_eval.test,
sample_size=settings.SAMPLE_SIZE,
n_prevpoints=21,
n_repetitions=25,
n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1
)
test_estim_prevalence = model.quantify(benchmark_eval.test.instances)
test_true_prevalence = benchmark_eval.test.prevalence()
evaluate_experiment(true_prevalences, estim_prevalences)
evaluate_method_point_test(test_true_prevalence, test_estim_prevalence)
save_results(test_name, model_name, optim_loss,
true_prevalences, estim_prevalences,
benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence,
best_params)
#if isinstance(model, QuaNet):
#model.clean_checkpoint_dir()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str,
help='path to the directory where to store the results')
parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification',
help='path to the directory with svmperf')
parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint',
help='path to the directory where to dump QuaNet checkpoints')
args = parser.parse_args()
print(f'Result folder: {args.results}')
np.random.seed(0)
optim_losses = ['mae', 'mrae']
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
models = quantification_models()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS)
models = quantification_cuda_models()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS)
models = quantification_ensembles()
qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1)
# Parallel(n_jobs=1)(
# delayed(run)(experiment) for experiment in itertools.product(optim_losses, datasets, models)
# )
#shutil.rmtree(args.checkpointdir, ignore_errors=True)

View File

@ -1,95 +0,0 @@
import quapy as qp
import settings
import os
import pathlib
import pickle
from glob import glob
import sys
from TweetSentQuant.util import nicename
from os.path import join
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
plotext='png'
resultdir = './results'
plotdir = './plots'
os.makedirs(plotdir, exist_ok=True)
def gather_results(methods, error_name):
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
for method in methods:
for experiment in glob(f'{resultdir}/*-{method}-m{error_name}.pkl'):
true_prevalences, estim_prevalences, tr_prev, te_prev, te_prev_estim, best_params = pickle.load(open(experiment, 'rb'))
method_names.append(nicename(method))
true_prevs.append(true_prevalences)
estim_prevs.append(estim_prevalences)
tr_prevs.append(tr_prev)
return method_names, true_prevs, estim_prevs, tr_prevs
def plot_error_by_drift(methods, error_name, logscale=False, path=None):
print('plotting error by drift')
if path is not None:
path = join(path, f'error_by_drift_{error_name}.{plotext}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
qp.plot.error_by_drift(
method_names,
true_prevs,
estim_prevs,
tr_prevs,
n_bins=20,
error_name=error_name,
show_std=False,
logscale=logscale,
title=f'Quantification error as a function of distribution shift',
savepath=path
)
def diagonal_plot(methods, error_name, path=None):
print('plotting diagonal plots')
if path is not None:
path = join(path, f'diag_{error_name}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, show_std=False, savepath=f'{path}_neg.{plotext}')
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, show_std=False, savepath=f'{path}_neu.{plotext}')
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, show_std=False, savepath=f'{path}_pos.{plotext}')
def binary_bias_global(methods, error_name, path=None):
print('plotting bias global')
if path is not None:
path = join(path, f'globalbias_{error_name}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', savepath=f'{path}_neg.{plotext}')
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', savepath=f'{path}_neu.{plotext}')
qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', savepath=f'{path}_pos.{plotext}')
def binary_bias_bins(methods, error_name, path=None):
print('plotting bias local')
if path is not None:
path = join(path, f'localbias_{error_name}')
method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name)
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, savepath=f'{path}_neg.{plotext}')
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, savepath=f'{path}_neu.{plotext}')
qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, savepath=f'{path}_pos.{plotext}')
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
new_methods_ae = ['svmmae' , 'epaccmaeptr', 'epaccmaemae', 'hdy', 'quanet']
new_methods_rae = ['svmmrae' , 'epaccmraeptr', 'epaccmraemrae', 'hdy', 'quanet']
plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir)
diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)
#binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir)
#binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir)

View File

@ -1,145 +0,0 @@
import quapy as qp
import numpy as np
from os import makedirs
import sys, os
import pickle
import argparse
from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani
import settings
from experiments import result_path
from tabular import Table
tables_path = './tables'
MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results
makedirs(tables_path, exist_ok=True)
qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE
def save_table(path, table):
print(f'saving results in {path}')
with open(path, 'wt') as foo:
foo.write(table)
def experiment_errors(path, dataset, method, loss):
path = result_path(path, dataset, method, 'm'+loss if not loss.startswith('m') else loss)
if os.path.exists(path):
true_prevs, estim_prevs, _, _, _, _ = pickle.load(open(path, 'rb'))
err_fn = getattr(qp.error, loss)
errors = err_fn(true_prevs, estim_prevs)
return errors
return None
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate tables for Tweeter Sentiment Quantification')
parser.add_argument('results', metavar='RESULT_PATH', type=str,
help='path to the directory where to store the results')
args = parser.parse_args()
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
evaluation_measures = [qp.error.ae, qp.error.rae]
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
new_methods = ['hdy', 'quanet']
gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
for i, eval_func in enumerate(evaluation_measures):
# Tables evaluation scores for AE and RAE (two tables)
# ----------------------------------------------------
eval_name = eval_func.__name__
added_methods = ['svmm' + eval_name, f'epaccm{eval_name}ptr', f'epaccm{eval_name}m{eval_name}'] + new_methods
methods = gao_seb_methods + added_methods
nold_methods = len(gao_seb_methods)
nnew_methods = len(added_methods)
# fill data table
table = Table(benchmarks=datasets, methods=methods)
for dataset in datasets:
for method in methods:
table.add(dataset, method, experiment_errors(args.results, dataset, method, eval_name))
# write the latex table
# tabular = """
# \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*nold_methods)+ '|' + ('Y|'*nnew_methods) + """} \hline
# & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} &
# \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline
# """
tabular = """
\\resizebox{\\textwidth}{!}{%
\\begin{tabular}{|c||""" + ('c|' * nold_methods) + '|' + ('c|' * nnew_methods) + """} \hline
& \multicolumn{""" + str(nold_methods) + """}{c||}{Methods tested in~\cite{Gao:2016uq}} &
\multicolumn{""" + str(nnew_methods) + """}{c|}{} \\\\ \hline
"""
rowreplace={dataset: nicename(dataset) for dataset in datasets}
colreplace={method: nicename(method, eval_name, side=True) for method in methods}
tabular += table.latexTabular(benchmark_replace=rowreplace, method_replace=colreplace)
tabular += """
\end{tabular}%
}
"""
save_table(f'./tables/tab_results_{eval_name}.new.tex', tabular)
# Tables ranks for AE and RAE (two tables)
# ----------------------------------------------------
methods = gao_seb_methods
table.dropMethods(added_methods)
# fill the data table
ranktable = Table(benchmarks=datasets, methods=methods, missing='--')
for dataset in datasets:
for method in methods:
ranktable.add(dataset, method, values=table.get(dataset, method, 'rank'))
# write the latex table
tabular = """
\\resizebox{\\textwidth}{!}{%
\\begin{tabular}{|c||""" + ('c|' * len(gao_seb_methods)) + """} \hline
& \multicolumn{""" + str(nold_methods) + """}{c|}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline
"""
for method in methods:
tabular += ' & ' + nicename(method, eval_name, side=True)
tabular += "\\\\\hline\n"
for dataset in datasets:
tabular += nicename(dataset) + ' '
for method in methods:
newrank = ranktable.get(dataset, method)
oldrank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}']
if newrank != '--':
newrank = f'{int(newrank)}'
color = ranktable.get_color(dataset, method)
if color == '--':
color = ''
tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color
tabular += '\\\\\hline\n'
tabular += '\hline\n'
tabular += 'Average '
for method in methods:
newrank = ranktable.get_average(method)
oldrank = gao_seb_ranks[f'Average-{method}-{eval_name}']
if newrank != '--':
newrank = f'{newrank:.1f}'
oldrank = f'{oldrank:.1f}'
color = ranktable.get_average(method, 'color')
if color == '--':
color = ''
tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color
tabular += '\\\\\hline\n'
tabular += """
\end{tabular}%
}
"""
save_table(f'./tables/tab_rank_{eval_name}.new.tex', tabular)
print("[Done]")

View File

@ -1,8 +0,0 @@
import multiprocessing
N_JOBS = -2 #multiprocessing.cpu_count()
CUDA_N_JOBS = 2
ENSEMBLE_N_JOBS = -2
SAMPLE_SIZE = 100

View File

@ -1,318 +0,0 @@
import numpy as np
import itertools
from scipy.stats import ttest_ind_from_stats, wilcoxon
class Table:
VALID_TESTS = [None, "wilcoxon", "ttest"]
def __init__(self, benchmarks, methods, lower_is_better=True, ttest='ttest', prec_mean=3,
clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--', color=True):
assert ttest in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}'
self.benchmarks = np.asarray(benchmarks)
self.benchmark_index = {row:i for i, row in enumerate(benchmarks)}
self.methods = np.asarray(methods)
self.method_index = {col:j for j, col in enumerate(methods)}
self.map = {}
# keyed (#rows,#cols)-ndarrays holding computations from self.map['values']
self._addmap('values', dtype=object)
self.lower_is_better = lower_is_better
self.ttest = ttest
self.prec_mean = prec_mean
self.clean_zero = clean_zero
self.show_std = show_std
self.prec_std = prec_std
self.add_average = average
self.missing = missing
self.missing_str = missing_str
self.color = color
self.touch()
@property
def nbenchmarks(self):
return len(self.benchmarks)
@property
def nmethods(self):
return len(self.methods)
def touch(self):
self._modif = True
def update(self):
if self._modif:
self.compute()
def _getfilled(self):
return np.argwhere(self.map['fill'])
@property
def values(self):
return self.map['values']
def _indexes(self):
return itertools.product(range(self.nbenchmarks), range(self.nmethods))
def _addmap(self, map, dtype, func=None):
self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype)
if func is None:
return
m = self.map[map]
f = func
indexes = self._indexes() if map == 'fill' else self._getfilled()
for i, j in indexes:
m[i, j] = f(self.values[i, j])
def _addrank(self):
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
col_means = [self.map['mean'][i,j] for j in filled_cols_idx]
ranked_cols_idx = filled_cols_idx[np.argsort(col_means)]
if not self.lower_is_better:
ranked_cols_idx = ranked_cols_idx[::-1]
self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx)+1)
def _addcolor(self):
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
if filled_cols_idx.size==0:
continue
col_means = [self.map['mean'][i,j] for j in filled_cols_idx]
minval = min(col_means)
maxval = max(col_means)
for col_idx in filled_cols_idx:
val = self.map['mean'][i,col_idx]
norm = (maxval - minval)
if norm > 0:
normval = (val - minval) / norm
else:
normval = 0.5
if self.lower_is_better:
normval = 1 - normval
self.map['color'][i, col_idx] = color_red2green_01(normval)
def _run_ttest(self, row, col1, col2):
mean1 = self.map['mean'][row, col1]
std1 = self.map['std'][row, col1]
nobs1 = self.map['nobs'][row, col1]
mean2 = self.map['mean'][row, col2]
std2 = self.map['std'][row, col2]
nobs2 = self.map['nobs'][row, col2]
_, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2)
return p_val
def _run_wilcoxon(self, row, col1, col2):
values1 = self.map['values'][row, col1]
values2 = self.map['values'][row, col2]
_, p_val = wilcoxon(values1, values2)
return p_val
def _add_statistical_test(self):
if self.ttest is None:
return
self.some_similar = [False]*self.nmethods
for i in range(self.nbenchmarks):
filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten()
if len(filled_cols_idx) <= 1:
continue
col_means = [self.map['mean'][i,j] for j in filled_cols_idx]
best_pos = filled_cols_idx[np.argmin(col_means)]
for j in filled_cols_idx:
if j==best_pos:
continue
if self.ttest == 'ttest':
p_val = self._run_ttest(i, best_pos, j)
else:
p_val = self._run_wilcoxon(i, best_pos, j)
pval_outcome = pval_interpretation(p_val)
self.map['ttest'][i, j] = pval_outcome
if pval_outcome != 'Diff':
self.some_similar[j] = True
def compute(self):
self._addmap('fill', dtype=bool, func=lambda x: x is not None)
self._addmap('mean', dtype=float, func=np.mean)
self._addmap('std', dtype=float, func=np.std)
self._addmap('nobs', dtype=float, func=len)
self._addmap('rank', dtype=int, func=None)
self._addmap('color', dtype=object, func=None)
self._addmap('ttest', dtype=object, func=None)
self._addmap('latex', dtype=object, func=None)
self._addrank()
self._addcolor()
self._add_statistical_test()
if self.add_average:
self._addave()
self._modif = False
def _is_column_full(self, col):
return all(self.map['fill'][:, self.method_index[col]])
def _addave(self):
ave = Table(['ave'], self.methods, lower_is_better=self.lower_is_better, ttest=self.ttest, average=False,
missing=self.missing, missing_str=self.missing_str)
for col in self.methods:
values = None
if self._is_column_full(col):
if self.ttest == 'ttest':
values = np.asarray(self.map['mean'][:, self.method_index[col]])
else: # wilcoxon
values = np.concatenate(self.values[:, self.method_index[col]])
ave.add('ave', col, values)
self.average = ave
def add(self, benchmark, method, values):
if values is not None:
values = np.asarray(values)
if values.ndim==0:
values = values.flatten()
rid, cid = self._coordinates(benchmark, method)
self.map['values'][rid, cid] = values
self.touch()
def get(self, benchmark, method, attr='mean'):
self.update()
assert attr in self.map, f'unknwon attribute {attr}'
rid, cid = self._coordinates(benchmark, method)
if self.map['fill'][rid, cid]:
v = self.map[attr][rid, cid]
if v is None or (isinstance(v,float) and np.isnan(v)):
return self.missing
return v
else:
return self.missing
def _coordinates(self, benchmark, method):
assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range'
assert method in self.method_index, f'method {method} out of range'
rid = self.benchmark_index[benchmark]
cid = self.method_index[method]
return rid, cid
def get_average(self, method, attr='mean'):
self.update()
if self.add_average:
return self.average.get('ave', method, attr=attr)
return None
def get_color(self, benchmark, method):
color = self.get(benchmark, method, attr='color')
if color is None:
return ''
return color
def latex(self, benchmark, method):
self.update()
i,j = self._coordinates(benchmark, method)
if self.map['fill'][i,j] == False:
return self.missing_str
mean = self.map['mean'][i,j]
l = f" {mean:.{self.prec_mean}f}"
if self.clean_zero:
l = l.replace(' 0.', '.')
isbest = self.map['rank'][i,j] == 1
if isbest:
l = "\\textbf{"+l.strip()+"}"
stat = ''
if self.ttest is not None and self.some_similar[j]:
test_label = self.map['ttest'][i,j]
if test_label == 'Sim':
stat = '^{\dag\phantom{\dag}}'
elif test_label == 'Same':
stat = '^{\ddag}'
elif isbest or test_label == 'Diff':
stat = '^{\phantom{\ddag}}'
std = ''
if self.show_std:
std = self.map['std'][i,j]
std = f" {std:.{self.prec_std}f}"
if self.clean_zero:
std = std.replace(' 0.', '.')
std = f" \pm {std:{self.prec_std}}"
if stat!='' or std!='':
l = f'{l}${stat}{std}$'
if self.color:
l += ' ' + self.map['color'][i,j]
return l
def latexTabular(self, benchmark_replace={}, method_replace={}, average=True):
tab = ' & '
tab += ' & '.join([method_replace.get(col, col) for col in self.methods])
tab += ' \\\\\hline\n'
for row in self.benchmarks:
rowname = benchmark_replace.get(row, row)
tab += rowname + ' & '
tab += self.latexRow(row)
if average:
tab += '\hline\n'
tab += 'Average & '
tab += self.latexAverage()
return tab
def latexRow(self, benchmark, endl='\\\\\hline\n'):
s = [self.latex(benchmark, col) for col in self.methods]
s = ' & '.join(s)
s += ' ' + endl
return s
def latexAverage(self, endl='\\\\\hline\n'):
if self.add_average:
return self.average.latexRow('ave', endl=endl)
def getRankTable(self):
t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=0, average=True)
for rid, cid in self._getfilled():
row = self.benchmarks[rid]
col = self.methods[cid]
t.add(row, col, self.get(row, col, 'rank'))
t.compute()
return t
def dropMethods(self, methods):
drop_index = [self.method_index[m] for m in methods]
new_methods = np.delete(self.methods, drop_index)
new_index = {col:j for j, col in enumerate(new_methods)}
self.map['values'] = self.values[:,np.asarray([self.method_index[m] for m in new_methods], dtype=int)]
self.methods = new_methods
self.method_index = new_index
self.touch()
def pval_interpretation(p_val):
if 0.005 >= p_val:
return 'Diff'
elif 0.05 >= p_val > 0.005:
return 'Sim'
elif p_val > 0.05:
return 'Same'
def color_red2green_01(val, maxtone=50):
if np.isnan(val): return None
assert 0 <= val <= 1, f'val {val} out of range [0,1]'
# rescale to [-1,1]
val = val * 2 - 1
if val < 0:
color = 'red'
tone = maxtone * (-val)
else:
color = 'green'
tone = maxtone * val
return '\cellcolor{' + color + f'!{int(tone)}' + '}'

View File

@ -1,89 +0,0 @@
import numpy as np
nice = {
'mae':'AE',
'mrae':'RAE',
'ae':'AE',
'rae':'RAE',
'svmkld': 'SVM(KLD)',
'svmnkld': 'SVM(NKLD)',
'svmq': 'SVM(Q)',
'svmae': 'SVM(AE)',
'svmnae': 'SVM(NAE)',
'svmmae': 'SVM(AE)',
'svmmrae': 'SVM(RAE)',
'quanet': 'QuaNet',
'hdy': 'HDy',
'dys': 'DyS',
'epaccmaeptr': 'E(PACC)$_\mathrm{Ptr}$',
'epaccmaemae': 'E(PACC)$_\mathrm{AE}$',
'epaccmraeptr': 'E(PACC)$_\mathrm{Ptr}$',
'epaccmraemrae': 'E(PACC)$_\mathrm{RAE}$',
'svmperf':'',
'sanders': 'Sanders',
'semeval13': 'SemEval13',
'semeval14': 'SemEval14',
'semeval15': 'SemEval15',
'semeval16': 'SemEval16',
'Average': 'Average'
}
def nicerm(key):
return '\mathrm{'+nice[key]+'}'
def nicename(method, eval_name=None, side=False):
m = nice.get(method, method.upper())
if eval_name is not None:
o = '$^{' + nicerm(eval_name) + '}$'
m = (m+o).replace('$$','')
if side:
m = '\side{'+m+'}'
return m
def load_Gao_Sebastiani_previous_results():
def rename(method):
old2new = {
'kld': 'svmkld',
'nkld': 'svmnkld',
'qbeta2': 'svmq',
'em': 'sld'
}
return old2new.get(method, method)
gao_seb_results = {}
with open('./Gao_Sebastiani_results.txt', 'rt') as fin:
lines = fin.readlines()
for line in lines[1:]:
line = line.strip()
parts = line.lower().split()
if len(parts) == 4:
dataset, method, ae, rae = parts
else:
method, ae, rae = parts
learner, method = method.split('-')
method = rename(method)
gao_seb_results[f'{dataset}-{method}-ae'] = float(ae)
gao_seb_results[f'{dataset}-{method}-rae'] = float(rae)
return gao_seb_results
def get_ranks_from_Gao_Sebastiani():
gao_seb_results = load_Gao_Sebastiani_previous_results()
datasets = set([key.split('-')[0] for key in gao_seb_results.keys()])
methods = np.sort(np.unique([key.split('-')[1] for key in gao_seb_results.keys()]))
ranks = {}
for metric in ['ae', 'rae']:
for dataset in datasets:
scores = [gao_seb_results[f'{dataset}-{method}-{metric}'] for method in methods]
order = np.argsort(scores)
sorted_methods = methods[order]
for i, method in enumerate(sorted_methods):
ranks[f'{dataset}-{method}-{metric}'] = i+1
for method in methods:
rankave = np.mean([ranks[f'{dataset}-{method}-{metric}'] for dataset in datasets])
ranks[f'Average-{method}-{metric}'] = rankave
return ranks, gao_seb_results