import os from pathlib import Path from sklearn.base import BaseEstimator import quapy as qp import numpy as np from method.aggregative import KDEyML from quapy.functional import l1_norm, ILRtransformation from scipy.stats import entropy def fetch_UCI_multiclass(data_name): return qp.datasets.fetch_UCIMulticlassDataset(data_name, min_class_support=0.01) def fetch_UCI_binary(data_name): return qp.datasets.fetch_UCIBinaryDataset(data_name) # global configurations binary = { 'datasets': qp.datasets.UCI_BINARY_DATASETS, 'fetch_fn': fetch_UCI_binary, 'sample_size': 500 } multiclass = { 'datasets': qp.datasets.UCI_MULTICLASS_DATASETS, 'fetch_fn': fetch_UCI_multiclass, 'sample_size': 1000 } multiclass['datasets'].remove('poker_hand') # random performance multiclass['datasets'].remove('hcv') # random performance multiclass['datasets'].remove('letter') # many classes multiclass['datasets'].remove('isolet') # many classes # utils def experiment_path(dir:Path, dataset_name:str, method_name:str): os.makedirs(dir, exist_ok=True) return dir/f'{dataset_name}__{method_name}.pkl' def normalized_entropy(p): """ Normalized Shannon entropy in [0, 1] p: array-like, prevalence vector (sums to 1) """ p = np.asarray(p) H = entropy(p) # Shannon entropy H_max = np.log(len(p)) return np.clip(H / H_max, 0, 1) def antagonistic_prevalence(p, strength=1): ilr = ILRtransformation() z = ilr(p) z_ant = - strength * z p_ant = ilr.inverse(z_ant) return p_ant class KDEyCLR(KDEyML): def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None): super().__init__( classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth, random_state=random_state, kernel='aitchison' ) class KDEyILR(KDEyML): def __init__(self, classifier: BaseEstimator=None, fit_classifier=True, val_split=5, bandwidth=1., random_state=None): super().__init__( classifier=classifier, fit_classifier=fit_classifier, val_split=val_split, bandwidth=bandwidth, random_state=random_state, kernel='ilr' )