1
0
Fork 0
QuaPy/Ordinal/experiments_lr_vs_ordlr_APP...

138 lines
4.6 KiB
Python

import numpy as np
from scipy.stats import wilcoxon
import quapy as qp
import os
from os.path import join
from Ordinal.tabular import Table
from utils import load_samples_folder, load_single_sample_pkl, jaggedness
from Ordinal.evaluation import nmd, mnmd
from tqdm import tqdm
import pandas as pd
from glob import glob
from pathlib import Path
"""
This script takes all results from the book domain, that correspond to the APP protocol, and filters by
smoothness so that only the 50% smoothest examples are considered, and recomputes the averages of the nmd
thus effectively reporting the results for the APP-OQ protocol
"""
def parse_str_prev(df_col):
values = df_col.values
array_list = [np.fromstring(array[1:-1], sep=' ') for array in values]
return np.asarray(array_list)
def parse_result_file(path):
df = pd.read_csv(path)
true_prev = parse_str_prev(df['true-prev'])
estim_prev = parse_str_prev(df['estim-prev'])
nmd = df['nmd'].values
return true_prev, estim_prev, nmd
def ave_jaggedness(prevs, less_percentile=1):
jag = np.sort([jaggedness(p) for p in prevs])
up_to = int(less_percentile * len(jag))
return np.mean(jag[:up_to])
def retain_half_smoothest(true_prev, estim_prev, nmd):
jag = [jaggedness(p) for p in true_prev]
order = np.argsort(jag)
up_to = len(order)//2
order = order[:up_to]
return true_prev[order], estim_prev[order], nmd[order]
def compute_half_smoothest_nmd(true_prev, estim_prev, nmd):
_, _, nmd_smooth = retain_half_smoothest(true_prev, estim_prev, nmd)
return nmd_smooth
if __name__ == '__main__':
domain = 'Books-roberta-base-finetuned-pkl/checkpoint-1188-average'
datapath = './data'
in_protocol = 'app'
out_protocol = 'app-oq'
in_result_path = join('./results', domain, in_protocol)
out_result_path = join('./results', domain, out_protocol)
os.makedirs(out_result_path, exist_ok=True)
# recompute the results in terms of APP-OQ
result_dict = {}
for filepath in glob(f'{in_result_path}/*).all.csv'):
name = Path(filepath).name
quantifier = name[:name.index('(')]
classifier = name[name.index('(')+1:name.index(')')]
true_prev, estim_prev, nmds = parse_result_file(filepath)
nmds = compute_half_smoothest_nmd(true_prev, estim_prev, nmds)
result_dict[classifier + '-' + quantifier] = nmds
# convert to numbers and search for the best in each quantifier
best_keys = {}
best_nmds = {}
for quantifier in ['CC', 'PCC', 'ACC', 'PACC', 'SLD']:
best_ave, best_key, best_nmd = None, None, None
for classifier in ['LR', 'OLR-AT', 'OLR-IT', 'ORidge', 'LAD']:
key = classifier + '-' + quantifier
if key in result_dict:
nmds = result_dict[key]
mean_val = np.mean(nmds)
if best_ave is None or mean_val < best_ave:
best_ave = mean_val
best_key = key
best_nmd = nmds
best_keys[quantifier] = best_key
best_nmds[quantifier] = best_nmd
# print(best_keys)
# write a latex table
for q in ['CC', 'PCC', 'ACC', 'PACC', 'SLD']:
print('& \multicolumn{2}{c}{'+q+'} ', end='')
print('\\\\')
print('\\midrule')
for classifier in ['LR', 'OLR-AT', 'OLR-IT', 'ORidge', 'LAD']:
print(classifier + '\t', end='')
for quantifier in ['CC', 'PCC', 'ACC', 'PACC', 'SLD']:
key = classifier + '-' + quantifier
the_best_nmds = best_nmds[quantifier]
if key in result_dict:
nmds = result_dict[key]
mean_val = np.mean(nmds)
bold = False
if best_keys[quantifier] == key:
bold = True
else:
_, pval = wilcoxon(nmds, the_best_nmds)
if pval > 0.01:
bold = True
str_mean = f'{mean_val:.4f}'
if bold:
str_mean = '\\textbf{' + str_mean + '}'
if classifier == 'LR':
std_val = np.std(nmds)
str_val = f'{str_mean} & $\pm {std_val:.4f}$'
else:
rel_increment = 100 * (mean_val-np.mean(the_best_nmds)) / np.mean(the_best_nmds)
sign = '+' if rel_increment>0 else ''
str_val = f'{str_mean} & ({sign}{rel_increment:.1f}\\%)'
else:
str_val = '\multicolumn{2}{c}{---}'
str_val = ' & ' + str_val
print(str_val, end='')
print('\\\\')