From 977599b9b1b293f2b2b4f1ccf333e13a80e2e2e5 Mon Sep 17 00:00:00 2001 From: Alex Moreo Date: Mon, 5 Jul 2021 09:15:36 +0200 Subject: [PATCH] cleaning branch --- TweetSentQuant/Gao_Sebastiani_results.txt | 89 ------ TweetSentQuant/evaluate_results.py | 35 --- TweetSentQuant/experiments_NPP.py | 214 --------------- TweetSentQuant/gen_plots.py | 95 ------- TweetSentQuant/gen_tables.py | 145 ---------- TweetSentQuant/settings.py | 8 - TweetSentQuant/tabular.py | 318 ---------------------- TweetSentQuant/util.py | 89 ------ 8 files changed, 993 deletions(-) delete mode 100644 TweetSentQuant/Gao_Sebastiani_results.txt delete mode 100644 TweetSentQuant/evaluate_results.py delete mode 100644 TweetSentQuant/experiments_NPP.py delete mode 100644 TweetSentQuant/gen_plots.py delete mode 100644 TweetSentQuant/gen_tables.py delete mode 100644 TweetSentQuant/settings.py delete mode 100644 TweetSentQuant/tabular.py delete mode 100644 TweetSentQuant/util.py diff --git a/TweetSentQuant/Gao_Sebastiani_results.txt b/TweetSentQuant/Gao_Sebastiani_results.txt deleted file mode 100644 index de0e6dd..0000000 --- a/TweetSentQuant/Gao_Sebastiani_results.txt +++ /dev/null @@ -1,89 +0,0 @@ - AE RAE -SemEval13 SVM-KLD 0.0722 0.1720 - SVM-NKLD 0.0714 0.2756 - SVM-QBETA2 0.0782 0.2775 - LR-CC 0.0996 0.3095 - LR-EM 0.1191 0.3923 - LR-PCC 0.0344 0.1506 - LR-ACC 0.0806 0.2479 - LR-PACC 0.0812 0.2626 -SemEval14 SVM-KLD 0.0843 0.2268 - SVM-NKLD 0.0836 0.3367 - SVM-QBETA2 0.1018 0.3680 - LR-CC 0.1043 0.3212 - LR-EM 0.0807 0.3517 - LR-PCC 0.1001 0.4277 - LR-ACC 0.0581 0.2360 - LR-PACC 0.0533 0.2573 -SemEval15 SVM-KLD 0.1185 0.3789 - SVM-NKLD 0.1155 0.4720 - SVM-QBETA2 0.1263 0.4762 - LR-CC 0.1101 0.2879 - LR-EM 0.1204 0.2949 - LR-PCC 0.0460 0.1973 - LR-ACC 0.1064 0.2971 - LR-PACC 0.1013 0.2729 -SemEval16 SVM-KLD 0.0385 0.1512 - SVM-NKLD 0.0830 0.3249 - SVM-QBETA2 0.1201 0.5156 - LR-CC 0.0500 0.1771 - LR-EM 0.0646 0.2126 - LR-PCC 0.0379 0.1553 - LR-ACC 0.0542 0.2246 - LR-PACC 0.0864 0.3504 -Sanders SVM-KLD 0.0134 0.0630 - SVM-NKLD 0.0950 0.3965 - SVM-QBETA2 0.1098 0.4360 - LR-CC 0.0671 0.2682 - LR-EM 0.0715 0.2849 - LR-PCC 0.0150 0.0602 - LR-ACC 0.0338 0.1306 - LR-PACC 0.0301 0.1173 -SST SVM-KLD 0.0413 0.1458 - SVM-NKLD 0.0749 0.2497 - SVM-QBETA2 0.0671 0.2343 - LR-CC 0.0330 0.1239 - LR-EM 0.0369 0.1190 - LR-PCC 0.0282 0.1068 - LR-ACC 0.0492 0.1689 - LR-PACC 0.0841 0.2302 -OMD SVM-KLD 0.0305 0.0999 - SVM-NKLD 0.0437 0.1279 - SVM-QBETA2 0.0624 0.1826 - LR-CC 0.0524 0.1527 - LR-EM 0.0648 0.1886 - LR-PCC 0.0046 0.0095 - LR-ACC 0.0239 0.0753 - LR-PACC 0.0100 0.0293 -HCR SVM-KLD 0.0414 0.2191 - SVM-NKLD 0.0604 0.2324 - SVM-QBETA2 0.1272 0.4600 - LR-CC 0.0525 0.1817 - LR-EM 0.0895 0.3093 - LR-PCC 0.0055 0.0202 - LR-ACC 0.0240 0.1026 - LR-PACC 0.0329 0.1436 -GASP SVM-KLD 0.0171 0.0529 - SVM-NKLD 0.0503 0.3416 - SVM-QBETA2 0.0640 0.4402 - LR-CC 0.0189 0.1297 - LR-EM 0.0231 0.1589 - LR-PCC 0.0097 0.0682 - LR-ACC 0.0150 0.1038 - LR-PACC 0.0087 0.0597 -WA SVM-KLD 0.0647 0.1957 - SVM-NKLD 0.0393 0.1357 - SVM-QBETA2 0.0798 0.2332 - LR-CC 0.0434 0.1270 - LR-EM 0.0391 0.1145 - LR-PCC 0.0338 0.0990 - LR-ACC 0.0407 0.1197 - LR-PACC 0.0277 0.0815 -WB SVM-KLD 0.0613 0.1791 - SVM-NKLD 0.0534 0.1756 - SVM-QBETA2 0.0249 0.0774 - LR-CC 0.0132 0.0399 - LR-EM 0.0244 0.0773 - LR-PCC 0.0123 0.0390 - LR-ACC 0.0230 0.0719 - LR-PACC 0.0165 0.0515 \ No newline at end of file diff --git a/TweetSentQuant/evaluate_results.py b/TweetSentQuant/evaluate_results.py deleted file mode 100644 index 2b8a4d0..0000000 --- a/TweetSentQuant/evaluate_results.py +++ /dev/null @@ -1,35 +0,0 @@ -import numpy as np -import quapy as qp -import settings -import os -import pickle -from glob import glob -import itertools -import pathlib - -qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE - -resultdir = './results' -methods = ['*'] - - -def evaluate_results(methods, datasets, error_name): - results_str = [] - all = [] - error = qp.error.from_name(error_name) - for method, dataset in itertools.product(methods, datasets): - for experiment in glob(f'{resultdir}/{dataset}-{method}-{error_name}.pkl'): - true_prevalences, estim_prevalences, tr_prev, te_prev, te_prev_estim, best_params = \ - pickle.load(open(experiment, 'rb')) - result = error(true_prevalences, estim_prevalences) - string = f'{pathlib.Path(experiment).name}: {result:.3f}' - results_str.append(string) - all.append(result) - results_str = sorted(results_str) - for r in results_str: - print(r) - print() - print(f'Ave: {np.mean(all):.3f}') - - -evaluate_results(methods=['epacc*mae1k'], datasets=['*'], error_name='mae') diff --git a/TweetSentQuant/experiments_NPP.py b/TweetSentQuant/experiments_NPP.py deleted file mode 100644 index 51048e5..0000000 --- a/TweetSentQuant/experiments_NPP.py +++ /dev/null @@ -1,214 +0,0 @@ -from sklearn.linear_model import LogisticRegression -import quapy as qp -from quapy.classification.methods import PCALR -from quapy.method.meta import QuaNet -from quapy.method.non_aggregative import MaximumLikelihoodPrevalenceEstimation -from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy -from quapy.method.meta import EPACC, EEMQ -import quapy.functional as F -import numpy as np -import os -import pickle -import itertools -from joblib import Parallel, delayed -import settings -import argparse -import torch -import shutil - - -qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE - - -__C_range = np.logspace(-4, 5, 10) - -lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']} -svmperf_params = {'C': __C_range} - - -def newLR(): - return LogisticRegression(max_iter=1000, solver='lbfgs', n_jobs=-1) - - -def quantification_models(): - # methods tested in Gao & Sebastiani 2016 - yield 'cc', CC(newLR()), lr_params - yield 'acc', ACC(newLR()), lr_params - yield 'pcc', PCC(newLR()), lr_params - yield 'pacc', PACC(newLR()), lr_params - yield 'sld', EMQ(newLR()), lr_params - yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params - yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params - yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params - - # methods added - yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params - yield 'svmmrae', OneVsAll(SVMRAE(args.svmperfpath)), svmperf_params - yield 'hdy', OneVsAll(HDy(newLR())), lr_params - - -def quantification_cuda_models(): - device = 'cuda' if torch.cuda.is_available() else 'cpu' - print(f'Running QuaNet in {device}') - learner = PCALR(**newLR().get_params()) - yield 'quanet', QuaNet(learner, settings.SAMPLE_SIZE, checkpointdir=args.checkpointdir, device=device), lr_params - - -def quantification_ensembles(): - param_mod_sel = { - 'sample_size': settings.SAMPLE_SIZE, - 'n_repetitions': 1000, - 'protocol': 'npp', - 'verbose': False - } - common = { - 'max_sample_size': 1000, - 'n_jobs': settings.ENSEMBLE_N_JOBS, - 'param_grid': lr_params, - 'param_mod_sel': param_mod_sel, - 'val_split': 0.4, - 'min_pos': 10 - } - - # hyperparameters will be evaluated within each quantifier of the ensemble, and so the typical model selection - # will be skipped (by setting hyperparameters to None) - hyper_none = None - #yield 'epaccmaeptr', EPACC(newLR(), optim='mae', policy='ptr', **common), hyper_none - yield 'epaccmaemae1k', EPACC(newLR(), optim='mae', policy='mae', **common), hyper_none - # yield 'esldmaeptr', EEMQ(newLR(), optim='mae', policy='ptr', **common), hyper_none - # yield 'esldmaemae', EEMQ(newLR(), optim='mae', policy='mae', **common), hyper_none - - #yield 'epaccmraeptr', EPACC(newLR(), optim='mrae', policy='ptr', **common), hyper_none - #yield 'epaccmraemrae', EPACC(newLR(), optim='mrae', policy='mrae', **common), hyper_none - #yield 'esldmraeptr', EEMQ(newLR(), optim='mrae', policy='ptr', **common), hyper_none - #yield 'esldmraemrae', EEMQ(newLR(), optim='mrae', policy='mrae', **common), hyper_none - - -def evaluate_experiment(true_prevalences, estim_prevalences): - print('\nEvaluation Metrics:\n'+'='*22) - for eval_measure in [qp.error.mae, qp.error.mrae]: - err = eval_measure(true_prevalences, estim_prevalences) - print(f'\t{eval_measure.__name__}={err:.4f}') - print() - - -def evaluate_method_point_test(true_prev, estim_prev): - print('\nPoint-Test evaluation:\n' + '=' * 22) - print(f'true-prev={F.strprev(true_prev)}, estim-prev={F.strprev(estim_prev)}') - for eval_measure in [qp.error.mae, qp.error.mrae]: - err = eval_measure(true_prev, estim_prev) - print(f'\t{eval_measure.__name__}={err:.4f}') - - -def result_path(path, dataset_name, model_name, optim_loss): - return os.path.join(path, f'{dataset_name}-{model_name}-{optim_loss}.pkl') - - -def is_already_computed(dataset_name, model_name, optim_loss): - if dataset_name=='semeval': - check_datasets = ['semeval13', 'semeval14', 'semeval15'] - else: - check_datasets = [dataset_name] - return all(os.path.exists(result_path(args.results, name, model_name, optim_loss)) for name in check_datasets) - - -def save_results(dataset_name, model_name, optim_loss, *results): - rpath = result_path(args.results, dataset_name, model_name, optim_loss) - qp.util.create_parent_dir(rpath) - with open(rpath, 'wb') as foo: - pickle.dump(tuple(results), foo, pickle.HIGHEST_PROTOCOL) - - -def run(experiment): - - optim_loss, dataset_name, (model_name, model, hyperparams) = experiment - - if is_already_computed(dataset_name, model_name, optim_loss=optim_loss): - print(f'result for dataset={dataset_name} model={model_name} loss={optim_loss} already computed.') - return - elif (optim_loss == 'mae' and 'mrae' in model_name) or (optim_loss=='mrae' and 'mae' in model_name): - print(f'skipping model={model_name} for optim_loss={optim_loss}') - return - else: - print(f'running dataset={dataset_name} model={model_name} loss={optim_loss}') - - benchmark_devel = qp.datasets.fetch_twitter(dataset_name, for_model_selection=True, min_df=5, pickle=True) - benchmark_devel.stats() - - # model selection (hyperparameter optimization for a quantification-oriented loss) - if hyperparams is not None: - model_selection = qp.model_selection.GridSearchQ( - model, - param_grid=hyperparams, - sample_size=settings.SAMPLE_SIZE, - protocol='npp', - n_repetitions=1000, - error=optim_loss, - refit=False, - timeout=60*60, - verbose=True - ) - model_selection.fit(benchmark_devel.training, benchmark_devel.test) - model = model_selection.best_model() - best_params = model_selection.best_params_ - else: - best_params = {} - - # model evaluation - test_names = [dataset_name] if dataset_name != 'semeval' else ['semeval13', 'semeval14', 'semeval15'] - for test_no, test_name in enumerate(test_names): - benchmark_eval = qp.datasets.fetch_twitter(test_name, for_model_selection=False, min_df=5, pickle=True) - if test_no == 0: - print('fitting the selected model') - # fits the model only the first time - model.fit(benchmark_eval.training) - - true_prevalences, estim_prevalences = qp.evaluation.natural_prevalence_prediction( - model, - test=benchmark_eval.test, - sample_size=settings.SAMPLE_SIZE, - n_repetitions=5000, - n_jobs=-1 if isinstance(model, qp.method.meta.Ensemble) else 1 - ) - test_estim_prevalence = model.quantify(benchmark_eval.test.instances) - test_true_prevalence = benchmark_eval.test.prevalence() - - evaluate_experiment(true_prevalences, estim_prevalences) - evaluate_method_point_test(test_true_prevalence, test_estim_prevalence) - save_results(test_name, model_name, optim_loss, - true_prevalences, estim_prevalences, - benchmark_eval.training.prevalence(), test_true_prevalence, test_estim_prevalence, - best_params) - - #if isinstance(model, QuaNet): - #model.clean_checkpoint_dir() - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Run experiments for Tweeter Sentiment Quantification using NPP') - parser.add_argument('results', metavar='RESULT_PATH', type=str, - help='path to the directory where to store the results') - parser.add_argument('--svmperfpath', metavar='SVMPERF_PATH', type=str, default='./svm_perf_quantification', - help='path to the directory with svmperf') - parser.add_argument('--checkpointdir', metavar='PATH', type=str, default='./checkpoint', - help='path to the directory where to dump QuaNet checkpoints') - args = parser.parse_args() - - print(f'Result folder: {args.results}') - np.random.seed(0) - - optim_losses = ['mae', 'mrae'] - datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN - - models = quantification_models() - qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.N_JOBS) - - models = quantification_cuda_models() - qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=settings.CUDA_N_JOBS) - - models = quantification_ensembles() - qp.util.parallel(run, itertools.product(optim_losses, datasets, models), n_jobs=1) - - #shutil.rmtree(args.checkpointdir, ignore_errors=True) - - diff --git a/TweetSentQuant/gen_plots.py b/TweetSentQuant/gen_plots.py deleted file mode 100644 index 360a96b..0000000 --- a/TweetSentQuant/gen_plots.py +++ /dev/null @@ -1,95 +0,0 @@ -import quapy as qp -import settings -import os -import pathlib -import pickle -from glob import glob -import sys -from TweetSentQuant.util import nicename -from os.path import join - - -qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE -plotext='png' - -resultdir = './results_npp' -plotdir = './plots_npp' -os.makedirs(plotdir, exist_ok=True) - -def gather_results(methods, error_name): - method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], [] - for method in methods: - for experiment in glob(f'{resultdir}/*-{method}-m{error_name}.pkl'): - true_prevalences, estim_prevalences, tr_prev, te_prev, te_prev_estim, best_params = pickle.load(open(experiment, 'rb')) - method_names.append(nicename(method)) - true_prevs.append(true_prevalences) - estim_prevs.append(estim_prevalences) - tr_prevs.append(tr_prev) - return method_names, true_prevs, estim_prevs, tr_prevs - - -def plot_error_by_drift(methods, error_name, logscale=False, path=None): - print('plotting error by drift') - if path is not None: - path = join(path, f'error_by_drift_{error_name}.{plotext}') - method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name) - qp.plot.error_by_drift( - method_names, - true_prevs, - estim_prevs, - tr_prevs, - n_bins=20, - error_name=error_name, - show_std=False, - logscale=logscale, - title=f'Quantification error as a function of distribution shift', - savepath=path - ) - - -def diagonal_plot(methods, error_name, path=None): - print('plotting diagonal plots') - if path is not None: - path = join(path, f'diag_{error_name}') - method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name) - qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, show_std=False, savepath=f'{path}_neg.{plotext}') - qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, show_std=False, savepath=f'{path}_neu.{plotext}') - qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, show_std=False, savepath=f'{path}_pos.{plotext}') - - -def binary_bias_global(methods, error_name, path=None): - print('plotting bias global') - if path is not None: - path = join(path, f'globalbias_{error_name}') - method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name) - qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', savepath=f'{path}_neg.{plotext}') - qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', savepath=f'{path}_neu.{plotext}') - qp.plot.binary_bias_global(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', savepath=f'{path}_pos.{plotext}') - - -def binary_bias_bins(methods, error_name, path=None): - print('plotting bias local') - if path is not None: - path = join(path, f'localbias_{error_name}') - method_names, true_prevs, estim_prevs, tr_prevs = gather_results(methods, error_name) - qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=0, title='Negative', legend=False, savepath=f'{path}_neg.{plotext}') - qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=1, title='Neutral', legend=False, savepath=f'{path}_neu.{plotext}') - qp.plot.binary_bias_bins(method_names, true_prevs, estim_prevs, pos_class=2, title='Positive', legend=True, savepath=f'{path}_pos.{plotext}') - - -gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld'] -new_methods_ae = ['svmmae' , 'epaccmaeptr', 'epaccmaemae', 'hdy', 'quanet'] -new_methods_rae = ['svmmrae' , 'epaccmraeptr', 'epaccmraemrae', 'hdy', 'quanet'] - -plot_error_by_drift(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) -plot_error_by_drift(gao_seb_methods+new_methods_rae, error_name='rae', logscale=True, path=plotdir) - -diagonal_plot(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) -diagonal_plot(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir) - -binary_bias_global(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) -binary_bias_global(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir) - -#binary_bias_bins(gao_seb_methods+new_methods_ae, error_name='ae', path=plotdir) -#binary_bias_bins(gao_seb_methods+new_methods_rae, error_name='rae', path=plotdir) - diff --git a/TweetSentQuant/gen_tables.py b/TweetSentQuant/gen_tables.py deleted file mode 100644 index 233443d..0000000 --- a/TweetSentQuant/gen_tables.py +++ /dev/null @@ -1,145 +0,0 @@ -import quapy as qp -import numpy as np -from os import makedirs -import sys, os -import pickle -import argparse -from TweetSentQuant.util import nicename, get_ranks_from_Gao_Sebastiani -import settings -from experiments_NPP import result_path -from tabular import Table - -tables_path = './tables_npp' -MAXTONE = 50 # sets the intensity of the maximum color reached by the worst (red) and best (green) results - -makedirs(tables_path, exist_ok=True) - -qp.environ['SAMPLE_SIZE'] = settings.SAMPLE_SIZE - - -def save_table(path, table): - print(f'saving results in {path}') - with open(path, 'wt') as foo: - foo.write(table) - - -def experiment_errors(path, dataset, method, loss): - path = result_path(path, dataset, method, 'm'+loss if not loss.startswith('m') else loss) - if os.path.exists(path): - true_prevs, estim_prevs, _, _, _, _ = pickle.load(open(path, 'rb')) - err_fn = getattr(qp.error, loss) - errors = err_fn(true_prevs, estim_prevs) - return errors - return None - - - -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Generate tables for Tweeter Sentiment Quantification') - parser.add_argument('results', metavar='RESULT_PATH', type=str, - help='path to the directory where to store the results') - args = parser.parse_args() - - datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST - evaluation_measures = [qp.error.ae, qp.error.rae] - gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld'] - new_methods = ['hdy', 'quanet'] - - gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani() - - for i, eval_func in enumerate(evaluation_measures): - - # Tables evaluation scores for AE and RAE (two tables) - # ---------------------------------------------------- - - eval_name = eval_func.__name__ - added_methods = ['svmm' + eval_name, f'epaccm{eval_name}ptr', f'epaccm{eval_name}m{eval_name}'] + new_methods - methods = gao_seb_methods + added_methods - nold_methods = len(gao_seb_methods) - nnew_methods = len(added_methods) - - # fill data table - table = Table(benchmarks=datasets, methods=methods) - for dataset in datasets: - for method in methods: - table.add(dataset, method, experiment_errors(args.results, dataset, method, eval_name)) - - # write the latex table - # tabular = """ - # \\begin{tabularx}{\\textwidth}{|c||""" + ('Y|'*nold_methods)+ '|' + ('Y|'*nnew_methods) + """} \hline - # & \multicolumn{"""+str(nold_methods)+"""}{c||}{Methods tested in~\cite{Gao:2016uq}} & - # \multicolumn{"""+str(nnew_methods)+"""}{c|}{} \\\\ \hline - # """ - tabular = """ - \\resizebox{\\textwidth}{!}{% - \\begin{tabular}{|c||""" + ('c|' * nold_methods) + '|' + ('c|' * nnew_methods) + """} \hline - & \multicolumn{""" + str(nold_methods) + """}{c||}{Methods tested in~\cite{Gao:2016uq}} & - \multicolumn{""" + str(nnew_methods) + """}{c|}{} \\\\ \hline - """ - rowreplace={dataset: nicename(dataset) for dataset in datasets} - colreplace={method: nicename(method, eval_name, side=True) for method in methods} - - tabular += table.latexTabular(benchmark_replace=rowreplace, method_replace=colreplace) - tabular += """ - \end{tabular}% - } - """ - - save_table(f'{tables_path}/tab_results_{eval_name}.npp.tex', tabular) - - # Tables ranks for AE and RAE (two tables) - # ---------------------------------------------------- - methods = gao_seb_methods - - table.dropMethods(added_methods) - - # fill the data table - ranktable = Table(benchmarks=datasets, methods=methods, missing='--') - for dataset in datasets: - for method in methods: - ranktable.add(dataset, method, values=table.get(dataset, method, 'rank')) - - # write the latex table - tabular = """ - \\resizebox{\\textwidth}{!}{% - \\begin{tabular}{|c||""" + ('c|' * len(gao_seb_methods)) + """} \hline - & \multicolumn{""" + str(nold_methods) + """}{c|}{Methods tested in~\cite{Gao:2016uq}} \\\\ \hline - """ - for method in methods: - tabular += ' & ' + nicename(method, eval_name, side=True) - tabular += "\\\\\hline\n" - - for dataset in datasets: - tabular += nicename(dataset) + ' ' - for method in methods: - newrank = ranktable.get(dataset, method) - oldrank = gao_seb_ranks[f'{dataset}-{method}-{eval_name}'] - if newrank != '--': - newrank = f'{int(newrank)}' - color = ranktable.get_color(dataset, method) - if color == '--': - color = '' - tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color - tabular += '\\\\\hline\n' - tabular += '\hline\n' - - tabular += 'Average ' - for method in methods: - newrank = ranktable.get_average(method) - oldrank = gao_seb_ranks[f'Average-{method}-{eval_name}'] - if newrank != '--': - newrank = f'{newrank:.1f}' - oldrank = f'{oldrank:.1f}' - color = ranktable.get_average(method, 'color') - if color == '--': - color = '' - tabular += ' & ' + f'{newrank}' + f' ({oldrank}) ' + color - tabular += '\\\\\hline\n' - tabular += """ - \end{tabular}% - } - """ - - save_table(f'{tables_path}/tab_rank_{eval_name}.npp.tex', tabular) - - print("[Done]") diff --git a/TweetSentQuant/settings.py b/TweetSentQuant/settings.py deleted file mode 100644 index 8064fa8..0000000 --- a/TweetSentQuant/settings.py +++ /dev/null @@ -1,8 +0,0 @@ -import multiprocessing - -N_JOBS = -2 #multiprocessing.cpu_count() -CUDA_N_JOBS = 2 -ENSEMBLE_N_JOBS = -2 - -SAMPLE_SIZE = 100 - diff --git a/TweetSentQuant/tabular.py b/TweetSentQuant/tabular.py deleted file mode 100644 index cb90f3f..0000000 --- a/TweetSentQuant/tabular.py +++ /dev/null @@ -1,318 +0,0 @@ -import numpy as np -import itertools -from scipy.stats import ttest_ind_from_stats, wilcoxon - - -class Table: - VALID_TESTS = [None, "wilcoxon", "ttest"] - - def __init__(self, benchmarks, methods, lower_is_better=True, ttest='ttest', prec_mean=3, - clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--', color=True): - assert ttest in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}' - - self.benchmarks = np.asarray(benchmarks) - self.benchmark_index = {row:i for i, row in enumerate(benchmarks)} - - self.methods = np.asarray(methods) - self.method_index = {col:j for j, col in enumerate(methods)} - - self.map = {} - # keyed (#rows,#cols)-ndarrays holding computations from self.map['values'] - self._addmap('values', dtype=object) - self.lower_is_better = lower_is_better - self.ttest = ttest - self.prec_mean = prec_mean - self.clean_zero = clean_zero - self.show_std = show_std - self.prec_std = prec_std - self.add_average = average - self.missing = missing - self.missing_str = missing_str - self.color = color - - self.touch() - - @property - def nbenchmarks(self): - return len(self.benchmarks) - - @property - def nmethods(self): - return len(self.methods) - - def touch(self): - self._modif = True - - def update(self): - if self._modif: - self.compute() - - def _getfilled(self): - return np.argwhere(self.map['fill']) - - @property - def values(self): - return self.map['values'] - - def _indexes(self): - return itertools.product(range(self.nbenchmarks), range(self.nmethods)) - - def _addmap(self, map, dtype, func=None): - self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype) - if func is None: - return - m = self.map[map] - f = func - indexes = self._indexes() if map == 'fill' else self._getfilled() - for i, j in indexes: - m[i, j] = f(self.values[i, j]) - - def _addrank(self): - for i in range(self.nbenchmarks): - filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() - col_means = [self.map['mean'][i,j] for j in filled_cols_idx] - ranked_cols_idx = filled_cols_idx[np.argsort(col_means)] - if not self.lower_is_better: - ranked_cols_idx = ranked_cols_idx[::-1] - self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx)+1) - - def _addcolor(self): - for i in range(self.nbenchmarks): - filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() - if filled_cols_idx.size==0: - continue - col_means = [self.map['mean'][i,j] for j in filled_cols_idx] - minval = min(col_means) - maxval = max(col_means) - for col_idx in filled_cols_idx: - val = self.map['mean'][i,col_idx] - norm = (maxval - minval) - if norm > 0: - normval = (val - minval) / norm - else: - normval = 0.5 - if self.lower_is_better: - normval = 1 - normval - self.map['color'][i, col_idx] = color_red2green_01(normval) - - def _run_ttest(self, row, col1, col2): - mean1 = self.map['mean'][row, col1] - std1 = self.map['std'][row, col1] - nobs1 = self.map['nobs'][row, col1] - mean2 = self.map['mean'][row, col2] - std2 = self.map['std'][row, col2] - nobs2 = self.map['nobs'][row, col2] - _, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2) - return p_val - - def _run_wilcoxon(self, row, col1, col2): - values1 = self.map['values'][row, col1] - values2 = self.map['values'][row, col2] - _, p_val = wilcoxon(values1, values2) - return p_val - - def _add_statistical_test(self): - if self.ttest is None: - return - self.some_similar = [False]*self.nmethods - for i in range(self.nbenchmarks): - filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() - if len(filled_cols_idx) <= 1: - continue - col_means = [self.map['mean'][i,j] for j in filled_cols_idx] - best_pos = filled_cols_idx[np.argmin(col_means)] - - for j in filled_cols_idx: - if j==best_pos: - continue - if self.ttest == 'ttest': - p_val = self._run_ttest(i, best_pos, j) - else: - p_val = self._run_wilcoxon(i, best_pos, j) - - pval_outcome = pval_interpretation(p_val) - self.map['ttest'][i, j] = pval_outcome - if pval_outcome != 'Diff': - self.some_similar[j] = True - - def compute(self): - self._addmap('fill', dtype=bool, func=lambda x: x is not None) - self._addmap('mean', dtype=float, func=np.mean) - self._addmap('std', dtype=float, func=np.std) - self._addmap('nobs', dtype=float, func=len) - self._addmap('rank', dtype=int, func=None) - self._addmap('color', dtype=object, func=None) - self._addmap('ttest', dtype=object, func=None) - self._addmap('latex', dtype=object, func=None) - self._addrank() - self._addcolor() - self._add_statistical_test() - if self.add_average: - self._addave() - self._modif = False - - def _is_column_full(self, col): - return all(self.map['fill'][:, self.method_index[col]]) - - def _addave(self): - ave = Table(['ave'], self.methods, lower_is_better=self.lower_is_better, ttest=self.ttest, average=False, - missing=self.missing, missing_str=self.missing_str) - for col in self.methods: - values = None - if self._is_column_full(col): - if self.ttest == 'ttest': - values = np.asarray(self.map['mean'][:, self.method_index[col]]) - else: # wilcoxon - values = np.concatenate(self.values[:, self.method_index[col]]) - ave.add('ave', col, values) - self.average = ave - - def add(self, benchmark, method, values): - if values is not None: - values = np.asarray(values) - if values.ndim==0: - values = values.flatten() - rid, cid = self._coordinates(benchmark, method) - self.map['values'][rid, cid] = values - self.touch() - - def get(self, benchmark, method, attr='mean'): - self.update() - assert attr in self.map, f'unknwon attribute {attr}' - rid, cid = self._coordinates(benchmark, method) - if self.map['fill'][rid, cid]: - v = self.map[attr][rid, cid] - if v is None or (isinstance(v,float) and np.isnan(v)): - return self.missing - return v - else: - return self.missing - - def _coordinates(self, benchmark, method): - assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range' - assert method in self.method_index, f'method {method} out of range' - rid = self.benchmark_index[benchmark] - cid = self.method_index[method] - return rid, cid - - def get_average(self, method, attr='mean'): - self.update() - if self.add_average: - return self.average.get('ave', method, attr=attr) - return None - - def get_color(self, benchmark, method): - color = self.get(benchmark, method, attr='color') - if color is None: - return '' - return color - - def latex(self, benchmark, method): - self.update() - i,j = self._coordinates(benchmark, method) - if self.map['fill'][i,j] == False: - return self.missing_str - - mean = self.map['mean'][i,j] - l = f" {mean:.{self.prec_mean}f}" - if self.clean_zero: - l = l.replace(' 0.', '.') - - isbest = self.map['rank'][i,j] == 1 - if isbest: - l = "\\textbf{"+l.strip()+"}" - - stat = '' - if self.ttest is not None and self.some_similar[j]: - test_label = self.map['ttest'][i,j] - if test_label == 'Sim': - stat = '^{\dag\phantom{\dag}}' - elif test_label == 'Same': - stat = '^{\ddag}' - elif isbest or test_label == 'Diff': - stat = '^{\phantom{\ddag}}' - - std = '' - if self.show_std: - std = self.map['std'][i,j] - std = f" {std:.{self.prec_std}f}" - if self.clean_zero: - std = std.replace(' 0.', '.') - std = f" \pm {std:{self.prec_std}}" - - if stat!='' or std!='': - l = f'{l}${stat}{std}$' - - if self.color: - l += ' ' + self.map['color'][i,j] - - return l - - def latexTabular(self, benchmark_replace={}, method_replace={}, average=True): - tab = ' & ' - tab += ' & '.join([method_replace.get(col, col) for col in self.methods]) - tab += ' \\\\\hline\n' - for row in self.benchmarks: - rowname = benchmark_replace.get(row, row) - tab += rowname + ' & ' - tab += self.latexRow(row) - - if average: - tab += '\hline\n' - tab += 'Average & ' - tab += self.latexAverage() - return tab - - def latexRow(self, benchmark, endl='\\\\\hline\n'): - s = [self.latex(benchmark, col) for col in self.methods] - s = ' & '.join(s) - s += ' ' + endl - return s - - def latexAverage(self, endl='\\\\\hline\n'): - if self.add_average: - return self.average.latexRow('ave', endl=endl) - - def getRankTable(self): - t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=0, average=True) - for rid, cid in self._getfilled(): - row = self.benchmarks[rid] - col = self.methods[cid] - t.add(row, col, self.get(row, col, 'rank')) - t.compute() - return t - - def dropMethods(self, methods): - drop_index = [self.method_index[m] for m in methods] - new_methods = np.delete(self.methods, drop_index) - new_index = {col:j for j, col in enumerate(new_methods)} - - self.map['values'] = self.values[:,np.asarray([self.method_index[m] for m in new_methods], dtype=int)] - self.methods = new_methods - self.method_index = new_index - self.touch() - - -def pval_interpretation(p_val): - if 0.005 >= p_val: - return 'Diff' - elif 0.05 >= p_val > 0.005: - return 'Sim' - elif p_val > 0.05: - return 'Same' - - -def color_red2green_01(val, maxtone=50): - if np.isnan(val): return None - assert 0 <= val <= 1, f'val {val} out of range [0,1]' - - # rescale to [-1,1] - val = val * 2 - 1 - if val < 0: - color = 'red' - tone = maxtone * (-val) - else: - color = 'green' - tone = maxtone * val - return '\cellcolor{' + color + f'!{int(tone)}' + '}' - diff --git a/TweetSentQuant/util.py b/TweetSentQuant/util.py deleted file mode 100644 index fef866e..0000000 --- a/TweetSentQuant/util.py +++ /dev/null @@ -1,89 +0,0 @@ -import numpy as np - - -nice = { - 'mae':'AE', - 'mrae':'RAE', - 'ae':'AE', - 'rae':'RAE', - 'svmkld': 'SVM(KLD)', - 'svmnkld': 'SVM(NKLD)', - 'svmq': 'SVM(Q)', - 'svmae': 'SVM(AE)', - 'svmnae': 'SVM(NAE)', - 'svmmae': 'SVM(AE)', - 'svmmrae': 'SVM(RAE)', - 'quanet': 'QuaNet', - 'hdy': 'HDy', - 'dys': 'DyS', - 'epaccmaeptr': 'E(PACC)$_\mathrm{Ptr}$', - 'epaccmaemae': 'E(PACC)$_\mathrm{AE}$', - 'epaccmraeptr': 'E(PACC)$_\mathrm{Ptr}$', - 'epaccmraemrae': 'E(PACC)$_\mathrm{RAE}$', - 'svmperf':'', - 'sanders': 'Sanders', - 'semeval13': 'SemEval13', - 'semeval14': 'SemEval14', - 'semeval15': 'SemEval15', - 'semeval16': 'SemEval16', - 'Average': 'Average' -} - - -def nicerm(key): - return '\mathrm{'+nice[key]+'}' - - -def nicename(method, eval_name=None, side=False): - m = nice.get(method, method.upper()) - if eval_name is not None: - o = '$^{' + nicerm(eval_name) + '}$' - m = (m+o).replace('$$','') - if side: - m = '\side{'+m+'}' - return m - - -def load_Gao_Sebastiani_previous_results(): - def rename(method): - old2new = { - 'kld': 'svmkld', - 'nkld': 'svmnkld', - 'qbeta2': 'svmq', - 'em': 'sld' - } - return old2new.get(method, method) - - gao_seb_results = {} - with open('./Gao_Sebastiani_results.txt', 'rt') as fin: - lines = fin.readlines() - for line in lines[1:]: - line = line.strip() - parts = line.lower().split() - if len(parts) == 4: - dataset, method, ae, rae = parts - else: - method, ae, rae = parts - learner, method = method.split('-') - method = rename(method) - gao_seb_results[f'{dataset}-{method}-ae'] = float(ae) - gao_seb_results[f'{dataset}-{method}-rae'] = float(rae) - return gao_seb_results - - -def get_ranks_from_Gao_Sebastiani(): - gao_seb_results = load_Gao_Sebastiani_previous_results() - datasets = set([key.split('-')[0] for key in gao_seb_results.keys()]) - methods = np.sort(np.unique([key.split('-')[1] for key in gao_seb_results.keys()])) - ranks = {} - for metric in ['ae', 'rae']: - for dataset in datasets: - scores = [gao_seb_results[f'{dataset}-{method}-{metric}'] for method in methods] - order = np.argsort(scores) - sorted_methods = methods[order] - for i, method in enumerate(sorted_methods): - ranks[f'{dataset}-{method}-{metric}'] = i+1 - for method in methods: - rankave = np.mean([ranks[f'{dataset}-{method}-{metric}'] for dataset in datasets]) - ranks[f'Average-{method}-{metric}'] = rankave - return ranks, gao_seb_results