122 lines
3.4 KiB
Python
122 lines
3.4 KiB
Python
import os
|
|
from os.path import join
|
|
import pandas as pd
|
|
|
|
from scripts.data import load_vector_documents
|
|
from quapy.data.base import LabelledCollection
|
|
import sys
|
|
# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../')))
|
|
# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
|
|
# sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), './')))
|
|
#from LeQua2024.scripts import constants
|
|
#from LeQua2024._lequa2024 import fetch_lequa2024
|
|
import quapy as qp
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
# import seaborn as sns
|
|
from pathlib import Path
|
|
import glob
|
|
from scripts.constants import SAMPLE_SIZE
|
|
|
|
|
|
# os.chdir('/home/moreo/QuaPy/LeQua2024')
|
|
# print(os.getcwd())
|
|
|
|
TASK=2
|
|
qp.environ['SAMPLE_SIZE']=SAMPLE_SIZE[f'T{TASK}']
|
|
|
|
true_prevs_path = f'../TruePrevalences/T{TASK}.test_prevalences/T{TASK}/public/test_prevalences.txt'
|
|
folder = F'../Results_CODALAB_2024/extracted/TASK_{TASK}'
|
|
|
|
def load_result_file(path):
|
|
df = pd.read_csv(path, index_col=0)
|
|
id = df.index.to_numpy()
|
|
prevs = df.values
|
|
return id, prevs
|
|
|
|
|
|
method_files = glob.glob(f"{folder}/*.csv")
|
|
|
|
|
|
method_names_nice={
|
|
'DistMatching-y': 'DM',
|
|
'TeamGMNet': 'UniOviedo(Team1)',
|
|
'tobiaslotz': 'Lamarr'
|
|
}
|
|
|
|
exclude_methods=[
|
|
'TeamCUFE',
|
|
'hustav',
|
|
'PCC',
|
|
'CC'
|
|
]
|
|
|
|
|
|
# desired_order=[
|
|
# 'Lamarr',
|
|
# 'SLD',
|
|
# 'DM',
|
|
# 'KDEy',
|
|
# 'UniOviedo(Team1)'
|
|
# ]
|
|
# desired_order=[
|
|
# 'PCC', 'Lamarr'
|
|
# ]
|
|
|
|
# load the true values (sentiment prevalence, domain prevalence)
|
|
true_id, true_prevs = load_result_file(true_prevs_path)
|
|
|
|
|
|
# define the loss for evaluation
|
|
error_name = 'RAE'
|
|
error_log = False
|
|
|
|
if error_name == 'RAE':
|
|
err_function_ = qp.error.rae
|
|
elif error_name == 'AE':
|
|
err_function_ = qp.error.ae
|
|
else:
|
|
raise ValueError()
|
|
|
|
if error_log:
|
|
error_name = f'log({error_name})'
|
|
err_function = lambda x,y: np.log(err_function_(x,y))
|
|
else:
|
|
err_function = err_function_
|
|
|
|
|
|
#train_prevalence = fetch_lequa2024(task=f'T{TASK}', data_home='./data')
|
|
train = LabelledCollection.load(f'../data/lequa2024/T{TASK}/public/training_data.txt', loader_func=load_vector_documents)
|
|
train_prev = train.prevalence()
|
|
#train_prev = np.tile(train_prev, (len(true_id),1))
|
|
|
|
from quapy.plot import error_by_drift, binary_diagonal
|
|
|
|
# load the participant and baseline results
|
|
method_names, estim_prevs = [], []
|
|
for method_file in method_files:
|
|
method_name = Path(method_file).name.replace('.csv', '')
|
|
if method_name in exclude_methods:
|
|
continue
|
|
id, method_prevs = load_result_file(join(folder, method_name+'.csv'))
|
|
assert (true_id == id).all(), f'unmatched files for {method_file}'
|
|
method_name = method_names_nice.get(method_name, method_name)
|
|
method_names.append(method_name)
|
|
estim_prevs.append(method_prevs)
|
|
|
|
true_prevs = [true_prevs]*len(method_names)
|
|
savepath = f'./t{TASK}_diagonal.png'
|
|
if TASK==1:
|
|
binary_diagonal(method_names, true_prevs, estim_prevs, pos_class=1, title=None, show_std=True, legend=True,
|
|
train_prev=train.prevalence(), savepath=savepath, method_order=None)
|
|
|
|
|
|
tr_prevs =[train.prevalence()]*len(method_names)
|
|
savepath = f'./t{TASK}_{error_name}_pps.png'
|
|
error_by_drift(method_names,
|
|
true_prevs,
|
|
estim_prevs,
|
|
tr_prevs, title=None,
|
|
error_name='rae', show_std=True, n_bins=1000,
|
|
show_density=True, vlines=[tr_prevs[0][1]], savepath=savepath)
|