83 lines
2.3 KiB
Python
83 lines
2.3 KiB
Python
import os
|
|
from pathlib import Path
|
|
|
|
from sklearn.base import BaseEstimator
|
|
|
|
import quapy as qp
|
|
import numpy as np
|
|
|
|
from method.aggregative import KDEyML
|
|
from quapy.functional import l1_norm, ILRtransformation
|
|
from scipy.stats import entropy
|
|
|
|
|
|
|
|
def fetch_UCI_multiclass(data_name):
|
|
return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01)
|
|
|
|
|
|
def fetch_UCI_binary(data_name):
|
|
return qp.datasets.fetch_UCIBinaryDataset(data_name)
|
|
|
|
# global configurations
|
|
|
|
binary = {
|
|
'datasets': qp.datasets.UCI_BINARY_DATASETS.copy(),
|
|
'fetch_fn': fetch_UCI_binary,
|
|
'sample_size': 500
|
|
}
|
|
|
|
multiclass = {
|
|
'datasets': qp.datasets.UCI_MULTICLASS_DATASETS.copy(),
|
|
'fetch_fn': fetch_UCI_multiclass,
|
|
'sample_size': 1000
|
|
}
|
|
try:
|
|
multiclass['datasets'].remove('poker_hand') # random performance
|
|
multiclass['datasets'].remove('hcv') # random performance
|
|
multiclass['datasets'].remove('letter') # many classes
|
|
multiclass['datasets'].remove('isolet') # many classes
|
|
except ValueError:
|
|
pass
|
|
|
|
|
|
# utils
|
|
def experiment_path(dir:Path, dataset_name:str, method_name:str):
|
|
os.makedirs(dir, exist_ok=True)
|
|
return dir/f'{dataset_name}__{method_name}.pkl'
|
|
|
|
|
|
def normalized_entropy(p):
|
|
"""
|
|
Normalized Shannon entropy in [0, 1]
|
|
p: array-like, prevalence vector (sums to 1)
|
|
"""
|
|
p = np.asarray(p)
|
|
H = entropy(p) # Shannon entropy
|
|
H_max = np.log(len(p))
|
|
return np.clip(H / H_max, 0, 1)
|
|
|
|
|
|
def antagonistic_prevalence(p, strength=1):
|
|
ilr = ILRtransformation()
|
|
z = ilr(p)
|
|
z_ant = - strength * z
|
|
p_ant = ilr.inverse(z_ant)
|
|
return p_ant
|
|
|
|
|
|
class KDEyCLR(KDEyML):
|
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
|
super().__init__(
|
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
|
random_state=random_state, kernel='aitchison'
|
|
)
|
|
|
|
|
|
class KDEyILR(KDEyML):
|
|
def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None):
|
|
super().__init__(
|
|
classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth,
|
|
random_state=random_state, kernel='ilr'
|
|
)
|