From d5b8d68f036797851ac9b74e0d04636016a55a81 Mon Sep 17 00:00:00 2001 From: Alejandro Moreo Date: Thu, 11 Apr 2024 11:33:48 +0200 Subject: [PATCH] switching --- .gitmodules | 3 + ClassifierAccuracy/experiments.py | 5 +- ClassifierAccuracy/gen_tables.py | 78 ++++++- ClassifierAccuracy/util/commons.py | 246 +------------------- ClassifierAccuracy/util/tabular.py | 349 ----------------------------- result_table | 1 + 6 files changed, 91 insertions(+), 591 deletions(-) create mode 100644 .gitmodules delete mode 100644 ClassifierAccuracy/util/tabular.py create mode 160000 result_table diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..89cf11c --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "result_table"] + path = result_table + url = gitea@gitea-s2i2s.isti.cnr.it:moreo/result_table.git diff --git a/ClassifierAccuracy/experiments.py b/ClassifierAccuracy/experiments.py index 0807e82..eb5fa4d 100644 --- a/ClassifierAccuracy/experiments.py +++ b/ClassifierAccuracy/experiments.py @@ -1,5 +1,8 @@ +from ClassifierAccuracy.gen_tables import gen_tables from ClassifierAccuracy.util.commons import * +from ClassifierAccuracy.util.generators import * from ClassifierAccuracy.util.plotting import plot_diagonal +from quapy.protocol import UPP PROBLEM = 'multiclass' ORACLE = False @@ -53,7 +56,7 @@ for (cls_name, h), (dataset_name, (L, V, U)) in itertools.product(gen_classifier # instances of CAPContingencyTable instead are generic, and the evaluation measure can # be nested to the predictions to speed up things for (method_name, method) in gen_CAP_cont_table(h): - if not any_missing(basedir, cls_name, dataset_name, method_name): + if not any_missing(basedir, cls_name, dataset_name, method_name, acc_measures): print(f'\t\tmethod {method_name} has all results already computed. Skipping.') continue diff --git a/ClassifierAccuracy/gen_tables.py b/ClassifierAccuracy/gen_tables.py index fc8d389..66aaa08 100644 --- a/ClassifierAccuracy/gen_tables.py +++ b/ClassifierAccuracy/gen_tables.py @@ -1,3 +1,77 @@ -from ClassifierAccuracy.util.commons import gen_tables -gen_tables() \ No newline at end of file +def gen_tables(basedir, datasets): + + + mock_h = LogisticRegression(), + methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)] + classifiers = [classifier for classifier, _ in gen_classifiers()] + + os.makedirs('./tables', exist_ok=True) + + with_oracle = 'oracle' in basedir + + tex_doc = """ + \\documentclass[10pt,a4paper]{article} + \\usepackage[utf8]{inputenc} + \\usepackage{amsmath} + \\usepackage{amsfonts} + \\usepackage{amssymb} + \\usepackage{graphicx} + \\usepackage{tabularx} + \\usepackage{color} + \\usepackage{colortbl} + \\usepackage{xcolor} + \\begin{document} + """ + + for classifier in classifiers: + for metric in [measure for measure, _ in gen_acc_measure()]: + + table = Table(datasets, methods, prec_mean=5, clean_zero=True) + for method, dataset in itertools.product(methods, datasets): + path = getpath(basedir, classifier, metric, dataset, method) + if not os.path.exists(path): + print('missing ', path) + continue + results = json.load(open(path, 'r')) + true_acc = results['true_acc'] + estim_acc = np.asarray(results['estim_acc']) + if any(np.isnan(estim_acc)): + print(f'nan values found in {method=} {dataset=}') + continue + if any(estim_acc>1.00001): + print(f'values >1 found in {method=} {dataset=} [max={estim_acc.max()}]') + continue + if any(estim_acc<-0.00001): + print(f'values <0 found in {method=} {dataset=} [min={estim_acc.min()}]') + continue + errors = cap_errors(true_acc, estim_acc) + table.add(dataset, method, errors) + + tex = table.latexTabular() + table_name = f'{basedir}_{classifier}_{metric}.tex' + table_name = table_name.replace('/', '_') + with open(f'./tables/{table_name}', 'wt') as foo: + foo.write('\\begin{table}[h]\n') + foo.write('\\centering\n') + foo.write('\\resizebox{\\textwidth}{!}{%\n') + foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n') + foo.write(tex) + foo.write('\\end{tabular}%\n') + foo.write('}\n') + foo.write('\\caption{Classifier ' + classifier.replace('_', ' ') + ('(oracle)' if with_oracle else '') + + ' evaluated in terms of ' + metric.replace('_', ' ') + '}\n') + foo.write('\\end{table}\n') + + tex_doc += "\input{" + table_name + "}\n\n" + + tex_doc += """ + \\end{document} + """ + with open(f'./tables/main.tex', 'wt') as foo: + foo.write(tex_doc) + + print("[Tables Done] runing latex") + os.chdir('./tables/') + os.system('pdflatex main.tex') + os.system('rm main.aux main.log') diff --git a/ClassifierAccuracy/util/commons.py b/ClassifierAccuracy/util/commons.py index ac01374..d7542fc 100644 --- a/ClassifierAccuracy/util/commons.py +++ b/ClassifierAccuracy/util/commons.py @@ -1,3 +1,6 @@ +from sklearn.base import BaseEstimator + +import quapy as qp import itertools import json import os @@ -6,139 +9,13 @@ from glob import glob from pathlib import Path from time import time import numpy as np -from sklearn.feature_extraction.text import TfidfVectorizer -from sklearn.metrics import accuracy_score, f1_score +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import accuracy_score, f1_score, confusion_matrix -from sklearn.datasets import fetch_rcv1, fetch_20newsgroups -from sklearn.model_selection import GridSearchCV - -from ClassifierAccuracy.models_multiclass import * from ClassifierAccuracy.util.tabular import Table from quapy.protocol import OnLabelledCollectionProtocol, AbstractStochasticSeededProtocol -from quapy.method.aggregative import EMQ, ACC, KDEyML from quapy.data import LabelledCollection -from quapy.data.datasets import fetch_UCIMulticlassLabelledCollection, UCI_MULTICLASS_DATASETS, fetch_lequa2022, TWITTER_SENTIMENT_DATASETS_TEST -from quapy.data.datasets import fetch_reviews - - -def gen_classifiers(): - param_grid = { - 'C': np.logspace(-4, -4, 9), - 'class_weight': ['balanced', None] - } - - yield 'LR', LogisticRegression() - #yield 'LR-opt', GridSearchCV(LogisticRegression(), param_grid, cv=5, n_jobs=-1) - #yield 'NB', GaussianNB() - #yield 'SVM(rbf)', SVC() - #yield 'SVM(linear)', LinearSVC() - - -def gen_multi_datasets(only_names=False)-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]: - for dataset_name in UCI_MULTICLASS_DATASETS: - if dataset_name == 'wine-quality': - continue - if only_names: - yield dataset_name, None - else: - dataset = fetch_UCIMulticlassLabelledCollection(dataset_name) - yield dataset_name, split(dataset) - - # yields the 20 newsgroups dataset - if only_names: - yield "20news", None - else: - train = fetch_20newsgroups(subset='train', remove=('headers', 'footers', 'quotes')) - test = fetch_20newsgroups(subset='test', remove=('headers', 'footers', 'quotes')) - tfidf = TfidfVectorizer(min_df=5, sublinear_tf=True) - Xtr = tfidf.fit_transform(train.data) - Xte = tfidf.transform((test.data)) - train = LabelledCollection(instances=Xtr, labels=train.target) - U = LabelledCollection(instances=Xte, labels=test.target) - T, V = train.split_stratified(train_prop=0.5, random_state=0) - yield "20news", (T, V, U) - - # yields the T1B@LeQua2022 (training) dataset - if only_names: - yield "T1B-LeQua2022", None - else: - train, _, _ = fetch_lequa2022(task='T1B') - yield "T1B-LeQua2022", split(train) - - -def gen_tweet_datasets(only_names=False)-> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]: - for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST: - if only_names: - yield dataset_name, None - else: - data = qp.datasets.fetch_twitter(dataset_name, min_df=3, pickle=True) - T, V = data.training.split_stratified(0.5, random_state=0) - U = data.test - yield dataset_name, (T, V, U) - - -def gen_bin_datasets(only_names=False) -> [str,[LabelledCollection,LabelledCollection,LabelledCollection]]: - if only_names: - for dataset_name in ['imdb', 'CCAT', 'GCAT', 'MCAT']: - yield dataset_name, None - else: - train, U = fetch_reviews('imdb', tfidf=True, min_df=10, pickle=True).train_test - L, V = train.split_stratified(0.5, random_state=0) - yield 'imdb', (L, V, U) - - training = fetch_rcv1(subset='train') - test = fetch_rcv1(subset='test') - class_names = training.target_names.tolist() - for cat in ['CCAT', 'GCAT', 'MCAT']: - class_idx = class_names.index(cat) - tr_labels = training.target[:,class_idx].toarray().flatten() - te_labels = test.target[:,class_idx].toarray().flatten() - tr = LabelledCollection(training.data, tr_labels) - U = LabelledCollection(test.data, te_labels) - L, V = tr.split_stratified(train_prop=0.5, random_state=0) - yield cat, (L, V, U) - - -def gen_CAP(h, acc_fn, with_oracle=False)->[str, ClassifierAccuracyPrediction]: - #yield 'SebCAP', SebastianiCAP(h, acc_fn, ACC) - # yield 'SebCAP-SLD', SebastianiCAP(h, acc_fn, EMQ, predict_train_prev=not with_oracle) - #yield 'SebCAP-KDE', SebastianiCAP(h, acc_fn, KDEyML) - #yield 'SebCAPweight', SebastianiCAP(h, acc_fn, ACC, alpha=0) - #yield 'PabCAP', PabloCAP(h, acc_fn, ACC) - # yield 'PabCAP-SLD-median', PabloCAP(h, acc_fn, EMQ, aggr='median') - yield 'ATC-MC', ATC(h, acc_fn, scoring_fn='maxconf') - # yield 'ATC-NE', ATC(h, acc_fn, scoring_fn='neg_entropy') - yield 'DoC', DoC(h, acc_fn, sample_size=qp.environ['SAMPLE_SIZE']) - - -def gen_CAP_cont_table(h)->[str,CAPContingencyTable]: - acc_fn = None - yield 'Naive', NaiveCAP(h, acc_fn) - yield 'CT-PPS-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression())) - # yield 'CT-PPS-KDE', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.01)) - # yield 'CT-PPS-KDE05', ContTableTransferCAP(h, acc_fn, KDEyML(LogisticRegression(class_weight='balanced'), bandwidth=0.05)) - #yield 'QuAcc(EMQ)nxn-noX', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_posteriors=True, add_X=False) - #yield 'QuAcc(EMQ)nxn', QuAccNxN(h, acc_fn, EMQ(LogisticRegression())) - #yield 'QuAcc(EMQ)nxn-MC', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxconf=True) - # yield 'QuAcc(EMQ)nxn-NE', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_negentropy=True) - #yield 'QuAcc(EMQ)nxn-MIS', QuAccNxN(h, acc_fn, EMQ(LogisticRegression()), add_maxinfsoft=True) - #yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression())) - #yield 'QuAcc(EMQ)1xn2', QuAcc1xN2(h, acc_fn, EMQ(LogisticRegression())) - #yield 'CT-PPSh-EMQ', ContTableTransferCAP(h, acc_fn, EMQ(LogisticRegression()), reuse_h=True) - #yield 'Equations-ACCh', NsquaredEquationsCAP(h, acc_fn, ACC, reuse_h=True) - # yield 'Equations-ACC', NsquaredEquationsCAP(h, acc_fn, ACC) - #yield 'Equations-SLD', NsquaredEquationsCAP(h, acc_fn, EMQ) - - -def get_method_names(): - mock_h = LogisticRegression() - return [m for m, _ in gen_CAP(mock_h, None)] + [m for m, _ in gen_CAP_cont_table(mock_h)] - - -def gen_acc_measure(): - yield 'vanilla_accuracy', vanilla_acc_fn - yield 'macro-F1', macrof1_fn def split(data: LabelledCollection): @@ -177,8 +54,8 @@ def predictionsCAPcont_table(method, test_prot, gen_acc_measure, oracle=False): return estim_accs_dict, t_test_ave -def any_missing(basedir, cls_name, dataset_name, method_name): - for acc_name, _ in gen_acc_measure(): +def any_missing(basedir, cls_name, dataset_name, method_name, acc_measures): + for acc_name in acc_measures(): if not os.path.exists(getpath(basedir, cls_name, acc_name, dataset_name, method_name)): return True return False @@ -322,114 +199,5 @@ def get_dataset_stats(path, test_prot, L, V): save_json_file(path, info) -def gen_tables(basedir, datasets): - mock_h = LogisticRegression(), - methods = [method for method, _ in gen_CAP(mock_h, None)] + [method for method, _ in gen_CAP_cont_table(mock_h)] - classifiers = [classifier for classifier, _ in gen_classifiers()] - - os.makedirs('./tables', exist_ok=True) - - with_oracle = 'oracle' in basedir - - tex_doc = """ - \\documentclass[10pt,a4paper]{article} - \\usepackage[utf8]{inputenc} - \\usepackage{amsmath} - \\usepackage{amsfonts} - \\usepackage{amssymb} - \\usepackage{graphicx} - \\usepackage{tabularx} - \\usepackage{color} - \\usepackage{colortbl} - \\usepackage{xcolor} - \\begin{document} - """ - - for classifier in classifiers: - for metric in [measure for measure, _ in gen_acc_measure()]: - - table = Table(datasets, methods, prec_mean=5, clean_zero=True) - for method, dataset in itertools.product(methods, datasets): - path = getpath(basedir, classifier, metric, dataset, method) - if not os.path.exists(path): - print('missing ', path) - continue - results = json.load(open(path, 'r')) - true_acc = results['true_acc'] - estim_acc = np.asarray(results['estim_acc']) - if any(np.isnan(estim_acc)): - print(f'nan values found in {method=} {dataset=}') - continue - if any(estim_acc>1.00001): - print(f'values >1 found in {method=} {dataset=} [max={estim_acc.max()}]') - continue - if any(estim_acc<-0.00001): - print(f'values <0 found in {method=} {dataset=} [min={estim_acc.min()}]') - continue - errors = cap_errors(true_acc, estim_acc) - table.add(dataset, method, errors) - - tex = table.latexTabular() - table_name = f'{basedir}_{classifier}_{metric}.tex' - table_name = table_name.replace('/', '_') - with open(f'./tables/{table_name}', 'wt') as foo: - foo.write('\\begin{table}[h]\n') - foo.write('\\centering\n') - foo.write('\\resizebox{\\textwidth}{!}{%\n') - foo.write('\\begin{tabular}{c|'+('c'*len(methods))+'}\n') - foo.write(tex) - foo.write('\\end{tabular}%\n') - foo.write('}\n') - foo.write('\\caption{Classifier ' + classifier.replace('_', ' ') + ('(oracle)' if with_oracle else '') + - ' evaluated in terms of ' + metric.replace('_', ' ') + '}\n') - foo.write('\\end{table}\n') - - tex_doc += "\input{" + table_name + "}\n\n" - - tex_doc += """ - \\end{document} - """ - with open(f'./tables/main.tex', 'wt') as foo: - foo.write(tex_doc) - - print("[Tables Done] runing latex") - os.chdir('./tables/') - os.system('pdflatex main.tex') - os.system('rm main.aux main.log') - - -class ArtificialAccuracyProtocol(AbstractStochasticSeededProtocol, OnLabelledCollectionProtocol): - - def __init__(self, data: LabelledCollection, h: BaseEstimator, sample_size=None, n_prevalences=101, repeats=10, random_state=0): - super(ArtificialAccuracyProtocol, self).__init__(random_state) - self.data = data - self.h = h - self.sample_size = qp._get_sample_size(sample_size) - self.n_prevalences = n_prevalences - self.repeats = repeats - self.collator = OnLabelledCollectionProtocol.get_collator('labelled_collection') - - def accuracy_grid(self): - grid = np.linspace(0, 1, self.n_prevalences) - grid = np.repeat(grid, self.repeats, axis=0) - return grid - - def samples_parameters(self): - # issue predictions - label_predictions = self.h.predict(self.data.X) - correct = label_predictions == self.data.y - self.data_evaluated = LabelledCollection(self.data.X, labels=correct, classes=[0,1]) - indexes = [] - for acc_value in self.accuracy_grid(): - index = self.data_evaluated.sampling_index(self.sample_size, acc_value) - indexes.append(index) - return indexes - - def sample(self, index): - return self.data.sampling_from_index(index) - - def total(self): - return self.n_prevalences * self.repeats - diff --git a/ClassifierAccuracy/util/tabular.py b/ClassifierAccuracy/util/tabular.py deleted file mode 100644 index 8cca81f..0000000 --- a/ClassifierAccuracy/util/tabular.py +++ /dev/null @@ -1,349 +0,0 @@ -import numpy as np -import itertools -from scipy.stats import ttest_ind_from_stats, wilcoxon - - -class Table: - VALID_TESTS = [None, "wilcoxon", "ttest"] - - def __init__(self, benchmarks, methods, lower_is_better=True, ttest='ttest', prec_mean=3, - clean_zero=False, show_std=False, prec_std=3, average=True, missing=None, missing_str='--', color=True, - maxtone=50): - assert ttest in self.VALID_TESTS, f'unknown test, valid are {self.VALID_TESTS}' - - self.benchmarks = np.asarray(benchmarks) - self.benchmark_index = {row: i for i, row in enumerate(benchmarks)} - - self.methods = np.asarray(methods) - self.method_index = {col: j for j, col in enumerate(methods)} - - self.map = {} - # keyed (#rows,#cols)-ndarrays holding computations from self.map['values'] - self._addmap('values', dtype=object) - self.lower_is_better = lower_is_better - self.ttest = ttest - self.prec_mean = prec_mean - self.clean_zero = clean_zero - self.show_std = show_std - self.prec_std = prec_std - self.add_average = average - self.missing = missing - self.missing_str = missing_str - self.color = color - self.maxtone = maxtone - - self.touch() - - @property - def nbenchmarks(self): - return len(self.benchmarks) - - @property - def nmethods(self): - return len(self.methods) - - def touch(self): - self._modif = True - - def update(self): - if self._modif: - self.compute() - - def _getfilled(self): - return np.argwhere(self.map['fill']) - - @property - def values(self): - return self.map['values'] - - def _indexes(self): - return itertools.product(range(self.nbenchmarks), range(self.nmethods)) - - def _addmap(self, map, dtype, func=None): - self.map[map] = np.empty((self.nbenchmarks, self.nmethods), dtype=dtype) - if func is None: - return - m = self.map[map] - f = func - indexes = self._indexes() if map == 'fill' else self._getfilled() - for i, j in indexes: - m[i, j] = f(self.values[i, j]) - - def _addrank(self): - for i in range(self.nbenchmarks): - filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() - col_means = [self.map['mean'][i, j] for j in filled_cols_idx] - ranked_cols_idx = filled_cols_idx[np.argsort(col_means)] - if not self.lower_is_better: - ranked_cols_idx = ranked_cols_idx[::-1] - self.map['rank'][i, ranked_cols_idx] = np.arange(1, len(filled_cols_idx) + 1) - - def _addcolor(self): - for i in range(self.nbenchmarks): - filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() - if filled_cols_idx.size == 0: - continue - col_means = [self.map['mean'][i, j] for j in filled_cols_idx] - # col_means = [self.map['rank'][i, j] for j in filled_cols_idx] - - minval = min(col_means) - maxval = max(col_means) - - for col_idx in filled_cols_idx: - val = self.map['mean'][i, col_idx] - norm = (maxval - minval) - if norm > 0: - normval = (val - minval) / norm - else: - normval = 0.5 - - if self.lower_is_better: - normval = 1 - normval - - normval = np.clip(normval, 0, 1) - - self.map['color'][i, col_idx] = color_red2green_01(normval, self.maxtone) - - def _run_ttest(self, row, col1, col2): - mean1 = self.map['mean'][row, col1] - std1 = self.map['std'][row, col1] - nobs1 = self.map['nobs'][row, col1] - mean2 = self.map['mean'][row, col2] - std2 = self.map['std'][row, col2] - nobs2 = self.map['nobs'][row, col2] - _, p_val = ttest_ind_from_stats(mean1, std1, nobs1, mean2, std2, nobs2) - return p_val - - def _run_wilcoxon(self, row, col1, col2): - values1 = self.map['values'][row, col1] - values2 = self.map['values'][row, col2] - try: - _, p_val = wilcoxon(values1, values2) - except ValueError: - p_val = 0 - return p_val - - def _add_statistical_test(self): - if self.ttest is None: - return - self.some_similar = [False] * self.nmethods - for i in range(self.nbenchmarks): - filled_cols_idx = np.argwhere(self.map['fill'][i]).flatten() - if len(filled_cols_idx) <= 1: - continue - col_means = [self.map['mean'][i, j] for j in filled_cols_idx] - best_pos = filled_cols_idx[np.argmin(col_means)] - - for j in filled_cols_idx: - if j == best_pos: - continue - if self.ttest == 'ttest': - p_val = self._run_ttest(i, best_pos, j) - else: - p_val = self._run_wilcoxon(i, best_pos, j) - - pval_outcome = pval_interpretation(p_val) - self.map['ttest'][i, j] = pval_outcome - if pval_outcome != 'Diff': - self.some_similar[j] = True - - def compute(self): - self._addmap('fill', dtype=bool, func=lambda x: x is not None) - self._addmap('mean', dtype=float, func=np.mean) - self._addmap('std', dtype=float, func=np.std) - self._addmap('nobs', dtype=float, func=len) - self._addmap('rank', dtype=int, func=None) - self._addmap('color', dtype=object, func=None) - self._addmap('ttest', dtype=object, func=None) - self._addmap('latex', dtype=object, func=None) - self._addrank() - self._addcolor() - self._add_statistical_test() - if self.add_average: - self._addave() - self._modif = False - - def _is_column_full(self, col): - return all(self.map['fill'][:, self.method_index[col]]) - - def _addave(self): - ave = Table(['ave'], self.methods, - lower_is_better=self.lower_is_better, - ttest=self.ttest, - average=False, - missing=self.missing, - missing_str=self.missing_str, - prec_mean=self.prec_mean, - prec_std=self.prec_std, - clean_zero=self.clean_zero, - show_std=self.show_std, - color=self.color, - maxtone=self.maxtone) - for col in self.methods: - values = None - if self._is_column_full(col): - if self.ttest == 'ttest': - # values = np.asarray(self.map['mean'][:, self.method_index[col]]) - values = np.concatenate(self.values[:, self.method_index[col]]) - else: # wilcoxon - # values = np.asarray(self.map['mean'][:, self.method_index[col]]) - values = np.concatenate(self.values[:, self.method_index[col]]) - ave.add('ave', col, values) - self.average = ave - - def add(self, benchmark, method, values): - if values is not None: - values = np.asarray(values) - if values.ndim == 0: - values = values.flatten() - rid, cid = self._coordinates(benchmark, method) - self.map['values'][rid, cid] = values - self.touch() - - def get(self, benchmark, method, attr='mean'): - self.update() - assert attr in self.map, f'unknwon attribute {attr}' - rid, cid = self._coordinates(benchmark, method) - if self.map['fill'][rid, cid]: - v = self.map[attr][rid, cid] - if v is None or (isinstance(v, float) and np.isnan(v)): - return self.missing - return v - else: - return self.missing - - def _coordinates(self, benchmark, method): - assert benchmark in self.benchmark_index, f'benchmark {benchmark} out of range' - assert method in self.method_index, f'method {method} out of range' - rid = self.benchmark_index[benchmark] - cid = self.method_index[method] - return rid, cid - - def get_average(self, method, attr='mean'): - self.update() - if self.add_average: - return self.average.get('ave', method, attr=attr) - return None - - def get_color(self, benchmark, method): - color = self.get(benchmark, method, attr='color') - if color is None: - return '' - return color - - def latex(self, benchmark, method): - self.update() - i, j = self._coordinates(benchmark, method) - if self.map['fill'][i, j] == False: - return self.missing_str - - mean = self.map['mean'][i, j] - l = f" {mean:.{self.prec_mean}f}" - if self.clean_zero: - l = l.replace(' 0.', '.') - - isbest = self.map['rank'][i, j] == 1 - if isbest: - l = "\\textbf{" + l.strip() + "}" - - stat = '' if self.ttest is None else '^{\phantom{\ddag}}' - if self.ttest is not None and self.some_similar[j]: - test_label = self.map['ttest'][i, j] - if test_label == 'Sim': - stat = '^{\dag}' - elif test_label == 'Same': - stat = '^{\ddag}' - elif isbest or test_label == 'Diff': - stat = '^{\phantom{\ddag}}' - - std = '' - if self.show_std: - std = self.map['std'][i, j] - std = f" {std:.{self.prec_std}f}" - if self.clean_zero: - std = std.replace(' 0.', '.') - std = f"\pm {std:{self.prec_std}}" - - if stat != '' or std != '': - l = f'{l}${stat}{std}$' - - if self.color: - l += ' ' + self.map['color'][i, j] - - return l - - def latexTabular(self, benchmark_replace={}, method_replace={}, aslines=False, endl='\\\\\hline'): - lines = [] - l = '\multicolumn{1}{c|}{} & ' - l += ' & '.join([method_replace.get(col, col) for col in self.methods]) - l += ' \\\\\hline' - lines.append(l) - - for row in self.benchmarks: - rowname = benchmark_replace.get(row, row) - l = rowname + ' & ' - l += self.latexRow(row, endl=endl) - lines.append(l) - - if self.add_average: - # l += '\hline\n' - l = '\hline \n \\textit{Average} & ' - l += self.latexAverage(endl=endl) - lines.append(l) - if not aslines: - lines = '\n'.join(lines) - return lines - - def latexRow(self, benchmark, endl='\\\\\hline\n'): - s = [self.latex(benchmark, col) for col in self.methods] - s = ' & '.join(s) - s += ' ' + endl - return s - - def latexAverage(self, endl='\\\\\hline\n'): - if self.add_average: - return self.average.latexRow('ave', endl=endl) - - def getRankTable(self, prec_mean=0): - t = Table(benchmarks=self.benchmarks, methods=self.methods, prec_mean=prec_mean, average=True, - maxtone=self.maxtone, ttest=None) - for rid, cid in self._getfilled(): - row = self.benchmarks[rid] - col = self.methods[cid] - t.add(row, col, self.get(row, col, 'rank')) - t.compute() - return t - - def dropMethods(self, methods): - drop_index = [self.method_index[m] for m in methods] - new_methods = np.delete(self.methods, drop_index) - new_index = {col: j for j, col in enumerate(new_methods)} - - self.map['values'] = self.values[:, np.asarray([self.method_index[m] for m in new_methods], dtype=int)] - self.methods = new_methods - self.method_index = new_index - self.touch() - - -def pval_interpretation(p_val): - if 0.005 >= p_val: - return 'Diff' - elif 0.05 >= p_val > 0.005: - return 'Sim' - elif p_val > 0.05: - return 'Same' - - -def color_red2green_01(val, maxtone=50): - if np.isnan(val): return None - assert 0 <= val <= 1, f'val {val} out of range [0,1]' - - # rescale to [-1,1] - val = val * 2 - 1 - if val < 0: - color = 'red' - tone = maxtone * (-val) - else: - color = 'green' - tone = maxtone * val - return '\cellcolor{' + color + f'!{int(tone)}' + '}' diff --git a/result_table b/result_table new file mode 160000 index 0000000..2e0e3d7 --- /dev/null +++ b/result_table @@ -0,0 +1 @@ +Subproject commit 2e0e3d7fc0464f9c9b50ace3c7785dd8d97710a6