forked from moreo/QuaPy
113 lines
5.4 KiB
Python
113 lines
5.4 KiB
Python
import numpy as np
|
|
from NewMethods.fgsld.metrics import isomerous_bins, isometric_bins
|
|
from NewMethods.fgsld.em import History, get_measures_single_history
|
|
from sklearn.model_selection import cross_val_predict
|
|
import math
|
|
from scipy.special import softmax
|
|
|
|
class FineGrainedSLD:
|
|
def __init__(self, x_tr, x_te, y_tr, tr_priors, clf, n_bins=10):
|
|
self.y_tr = y_tr
|
|
self.clf = clf
|
|
self.tr_priors = tr_priors
|
|
self.te_preds = clf.predict_proba(x_te)
|
|
self.tr_preds = cross_val_predict(clf, x_tr, y_tr, method='predict_proba', n_jobs=10)
|
|
self.n_bins = n_bins
|
|
self.history: [History] = []
|
|
self.multi_class = False
|
|
|
|
def run(self, isomerous_binning, epsilon=1e-6, compute_bins_at_every_iter=True):
|
|
"""
|
|
Run the FGSLD algorithm.
|
|
|
|
:param isomerous_binning: whether to use isomerous or isometric binning.
|
|
:param epsilon: stopping condition.
|
|
:param compute_bins_at_every_iter: whether FGSLD should recompute the posterior bins at every iteration or not.
|
|
:param return_posteriors_hist: whether to return posteriors at every iteration or not.
|
|
:return: If `return_posteriors_hist` is true, the returned posteriors will be a list of numpy arrays, else a single numpy array with posteriors at last iteration.
|
|
"""
|
|
smoothing_tr = 1e-9 # 1 / (2 * self.tr_preds.shape[0])
|
|
smoothing_te = 1e-9 # 1 / (2 * self.te_preds.shape[0])
|
|
s = 0
|
|
tr_bin_priors = np.zeros((self.n_bins, self.tr_preds.shape[1]), dtype=np.float)
|
|
te_bin_priors = np.zeros((self.n_bins, self.te_preds.shape[1]), dtype=np.float)
|
|
tr_bins = self.__create_bins(training=True, isomerous_binning=isomerous_binning)
|
|
self.__compute_bins_priors(tr_bin_priors, self.tr_preds, tr_bins, smoothing_tr)
|
|
|
|
te_preds_cp = self.te_preds.copy()
|
|
val = 2 * epsilon
|
|
while not val < epsilon and s < 1000:
|
|
if compute_bins_at_every_iter or s==0:
|
|
te_bins = self.__create_bins(training=False, isomerous_binning=isomerous_binning)
|
|
|
|
if s == 0:
|
|
te_bin_priors_prev = tr_bin_priors.copy()
|
|
else:
|
|
te_bin_priors_prev = te_bin_priors.copy()
|
|
self.__compute_bins_priors(te_bin_priors, self.te_preds, te_bins, smoothing_te)
|
|
|
|
for label_idx, bins in te_bins.items():
|
|
for i, bin_ in enumerate(bins):
|
|
if bin_.shape[0] == 0:
|
|
continue
|
|
alpha = 1
|
|
beta = 0.1
|
|
local_te = te_bin_priors[i][label_idx]
|
|
global_te = self.te_preds[:,label_idx].mean()
|
|
te = local_te*alpha + global_te*(1-alpha)
|
|
local_tr = tr_bin_priors[i][label_idx]
|
|
global_tr = self.tr_priors[label_idx]
|
|
tr = local_tr*beta + global_tr*(1-beta)
|
|
#local_min = (math.floor(tr * self.n_bins) / self.n_bins)
|
|
# local_max = local_min + .1
|
|
# trans = lambda l: min(max((l - local_min) / 1, 0), 1)
|
|
assert not isomerous_binning, 'not tested'
|
|
#trans = lambda l: l - local_min
|
|
# trans = lambda l: l
|
|
# ratio = (trans(te) / trans(tr))
|
|
#ratio = np.clip(ratio, 0.1, 2)
|
|
#ratio = ratio**3
|
|
#self.te_preds[:, label_idx][bin_] = (te_preds_cp[:, label_idx][bin_]) * ratio
|
|
old_posterior = te_preds_cp[:, label_idx][bin_]
|
|
lr = 1
|
|
#self.te_preds[:, label_idx][bin_] = np.clip(old_posterior + (te-tr)*lr, 0, None)
|
|
self.te_preds[:, label_idx][bin_] = np.clip(old_posterior + (te - tr) * lr, 0, None)
|
|
#self.te_preds[:, label_idx][bin_] = (te_preds_cp[:, label_idx][bin_]) * ratio
|
|
|
|
# Normalization step
|
|
self.te_preds = (self.te_preds / self.te_preds.sum(axis=1, keepdims=True))
|
|
#self.te_preds = softmax(self.te_preds, axis=1)
|
|
|
|
val = np.max(np.abs(te_bin_priors / te_bin_priors_prev) - 1)
|
|
s += 1
|
|
|
|
self.iterations = s
|
|
|
|
priors = self.te_preds.mean(axis=0)
|
|
posteriors = self.te_preds
|
|
|
|
return priors, posteriors
|
|
|
|
def __compute_bins_priors(self, bin_priors_placeholder, posteriors, bins, smoothing):
|
|
for label_idx, bins in bins.items():
|
|
for i, bin_ in enumerate(bins):
|
|
if bin_.shape[0] == 0:
|
|
bin_priors_placeholder[i, label_idx] = smoothing
|
|
continue
|
|
numerator = posteriors[bin_, label_idx].mean()
|
|
bin_prior = (numerator + smoothing) / (1 + self.n_bins * smoothing) # normalize priors
|
|
bin_priors_placeholder[i, label_idx] = bin_prior
|
|
|
|
def __create_bins(self, training: bool, isomerous_binning: bool):
|
|
bins = {}
|
|
preds = self.tr_preds if training else self.te_preds
|
|
if isomerous_binning:
|
|
for label_idx in range(preds.shape[1]):
|
|
bins[label_idx] = isomerous_bins(label_idx, preds, self.n_bins)
|
|
else:
|
|
intervals = np.linspace(0., 1., num=self.n_bins, endpoint=False)
|
|
for label_idx in range(preds.shape[1]):
|
|
bins_ = isometric_bins(label_idx, preds, intervals)
|
|
bins[label_idx] = [bins_[i] for i in intervals]
|
|
return bins
|