import numpy as np from scipy.stats import wilcoxon import quapy as qp import os from os.path import join from Ordinal.tabular import Table from utils import load_samples_folder, load_single_sample_pkl, jaggedness from Ordinal.evaluation import nmd, mnmd from tqdm import tqdm import pandas as pd from glob import glob from pathlib import Path """ This script takes all results from the book domain, that correspond to the APP protocol, and filters by smoothness so that only the 50% smoothest examples are considered, and recomputes the averages of the nmd thus effectively reporting the results for the APP-OQ protocol """ def parse_str_prev(df_col): values = df_col.values array_list = [np.fromstring(array[1:-1], sep=' ') for array in values] return np.asarray(array_list) def parse_result_file(path): df = pd.read_csv(path) true_prev = parse_str_prev(df['true-prev']) estim_prev = parse_str_prev(df['estim-prev']) nmd = df['nmd'].values return true_prev, estim_prev, nmd def ave_jaggedness(prevs, less_percentile=1): jag = np.sort([jaggedness(p) for p in prevs]) up_to = int(less_percentile * len(jag)) return np.mean(jag[:up_to]) def retain_half_smoothest(true_prev, estim_prev, nmd): jag = [jaggedness(p) for p in true_prev] order = np.argsort(jag) up_to = len(order)//2 order = order[:up_to] return true_prev[order], estim_prev[order], nmd[order] def compute_half_smoothest_nmd(true_prev, estim_prev, nmd): _, _, nmd_smooth = retain_half_smoothest(true_prev, estim_prev, nmd) return nmd_smooth if __name__ == '__main__': domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-average' datapath = './data' in_protocol = 'app' out_protocol = 'app-oq' in_result_path = join('./results', domain, in_protocol) out_result_path = join('./results', domain, out_protocol) os.makedirs(out_result_path, exist_ok=True) # recompute the results in terms of APP-OQ result_dict = {} for filepath in glob(f'{in_result_path}/*).all.csv'): name = Path(filepath).name quantifier = name[:name.index('(')] classifier = name[name.index('(')+1:name.index(')')] true_prev, estim_prev, nmds = parse_result_file(filepath) nmds = compute_half_smoothest_nmd(true_prev, estim_prev, nmds) result_dict[classifier + '-' + quantifier] = nmds # convert to numbers and search for the best in each quantifier best_keys = {} best_nmds = {} for quantifier in ['CC', 'PCC', 'ACC', 'PACC', 'SLD']: best_ave, best_key, best_nmd = None, None, None for classifier in ['LR', 'OLR-AT', 'OLR-IT', 'ORidge', 'LAD']: key = classifier + '-' + quantifier if key in result_dict: nmds = result_dict[key] mean_val = np.mean(nmds) if best_ave is None or mean_val < best_ave: best_ave = mean_val best_key = key best_nmd = nmds best_keys[quantifier] = best_key best_nmds[quantifier] = best_nmd # print(best_keys) # write a latex table for q in ['CC', 'PCC', 'ACC', 'PACC', 'SLD']: print('& \multicolumn{2}{c}{'+q+'} ', end='') print('\\\\') print('\\midrule') for classifier in ['LR', 'OLR-AT', 'OLR-IT', 'ORidge', 'LAD']: print(classifier + '\t', end='') for quantifier in ['CC', 'PCC', 'ACC', 'PACC', 'SLD']: key = classifier + '-' + quantifier the_best_nmds = best_nmds[quantifier] if key in result_dict: nmds = result_dict[key] mean_val = np.mean(nmds) bold = False if best_keys[quantifier] == key: bold = True else: _, pval = wilcoxon(nmds, the_best_nmds) if pval > 0.01: bold = True str_mean = f'{mean_val:.4f}' if bold: str_mean = '\\textbf{' + str_mean + '}' if classifier == 'LR': std_val = np.std(nmds) str_val = f'{str_mean} & $\pm {std_val:.4f}$' else: rel_increment = 100 * (mean_val-np.mean(the_best_nmds)) / np.mean(the_best_nmds) sign = '+' if rel_increment>0 else '' str_val = f'{str_mean} & ({sign}{rel_increment:.1f}\\%)' else: str_val = '\multicolumn{2}{c}{---}' str_val = ' & ' + str_val print(str_val, end='') print('\\\\')