forked from moreo/QuaPy
more fgsld
This commit is contained in:
parent
0d8c6aeba6
commit
8381bce3a8
|
@ -32,4 +32,4 @@ def evaluate_results(methods, datasets, error_name):
|
||||||
print(f'Ave: {np.mean(all):.3f}')
|
print(f'Ave: {np.mean(all):.3f}')
|
||||||
|
|
||||||
|
|
||||||
evaluate_results(methods=['epacc*mae1k'], datasets=['*'], error_name='mae')
|
evaluate_results(methods=['*'], datasets=['*'], error_name='mae')
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
from NewMethods.fgsld.fgsld_quantifiers import FakeFGLSD
|
||||||
from classification.methods import PCALR
|
from classification.methods import PCALR
|
||||||
from method.meta import QuaNet
|
from method.meta import QuaNet
|
||||||
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
from method.non_aggregative import MaximumLikelihoodPrevalenceEstimation
|
||||||
|
@ -36,8 +37,10 @@ def experimental_models():
|
||||||
svmperf_params = {'C': __C_range}
|
svmperf_params = {'C': __C_range}
|
||||||
#yield 'paccsld', PACCSLD(newLR()), lr_params
|
#yield 'paccsld', PACCSLD(newLR()), lr_params
|
||||||
# yield 'hdysld', OneVsAll(HDySLD(newLR())), lr_params # <-- promising!
|
# yield 'hdysld', OneVsAll(HDySLD(newLR())), lr_params # <-- promising!
|
||||||
yield 'PACC(5)', PACC(newLR(), val_split=5), {}
|
#yield 'PACC(5)', PACC(newLR(), val_split=5), {}
|
||||||
yield 'PACC(10)', PACC(newLR(), val_split=10), {}
|
#yield 'PACC(10)', PACC(newLR(), val_split=10), {}
|
||||||
|
yield 'FGSLD(3)', FakeFGLSD(newLR(), nbins=3, isomerous=False, recompute_bins=True), {}
|
||||||
|
yield 'FGSLD(5)', FakeFGLSD(newLR(), nbins=5, isomerous=False, recompute_bins=True), {}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -209,7 +212,7 @@ if __name__ == '__main__':
|
||||||
print(f'Result folder: {args.results}')
|
print(f'Result folder: {args.results}')
|
||||||
np.random.seed(0)
|
np.random.seed(0)
|
||||||
|
|
||||||
optim_losses = ['mae', 'mrae']
|
optim_losses = ['mae']
|
||||||
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
|
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TRAIN
|
||||||
|
|
||||||
qp.util.parallel(run, itertools.product(optim_losses, datasets, experimental_models()), n_jobs=settings.N_JOBS)
|
qp.util.parallel(run, itertools.product(optim_losses, datasets, experimental_models()), n_jobs=settings.N_JOBS)
|
||||||
|
|
|
@ -5,7 +5,7 @@ from collections import namedtuple
|
||||||
from sklearn.metrics import brier_score_loss
|
from sklearn.metrics import brier_score_loss
|
||||||
from sklearn.preprocessing import MultiLabelBinarizer
|
from sklearn.preprocessing import MultiLabelBinarizer
|
||||||
|
|
||||||
from metrics import smoothmacroF1, isometric_brier_decomposition, isomerous_brier_decomposition
|
from NewMethods.fgsld.metrics import smoothmacroF1, isometric_brier_decomposition, isomerous_brier_decomposition
|
||||||
|
|
||||||
History = namedtuple('History', ('posteriors', 'priors', 'y', 'iteration', 'stopping_criterium'))
|
History = namedtuple('History', ('posteriors', 'priors', 'y', 'iteration', 'stopping_criterium'))
|
||||||
MeasureSingleHistory = namedtuple('MeasureSingleHistory', (
|
MeasureSingleHistory = namedtuple('MeasureSingleHistory', (
|
||||||
|
|
|
@ -1,13 +1,15 @@
|
||||||
from sklearn.calibration import CalibratedClassifierCV
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
from sklearn.svm import LinearSVC
|
from sklearn.svm import LinearSVC
|
||||||
from fgsld.fgsld_quantifiers import FakeFGLSD
|
from fgsld_quantifiers import FakeFGLSD
|
||||||
from method.aggregative import EMQ, CC
|
from method.aggregative import EMQ, CC
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
qp.environ['SAMPLE_SIZE'] = 500
|
qp.environ['SAMPLE_SIZE'] = 500
|
||||||
|
|
||||||
dataset = qp.datasets.fetch_reviews('kindle')
|
dataset = qp.datasets.fetch_reviews('hp')
|
||||||
qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
|
qp.data.preprocessing.text2tfidf(dataset, min_df=5, inplace=True)
|
||||||
|
|
||||||
training = dataset.training
|
training = dataset.training
|
||||||
|
@ -15,22 +17,22 @@ test = dataset.test
|
||||||
|
|
||||||
cls = CalibratedClassifierCV(LinearSVC())
|
cls = CalibratedClassifierCV(LinearSVC())
|
||||||
|
|
||||||
|
#cls = LogisticRegression()
|
||||||
|
|
||||||
|
|
||||||
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
|
method_names, true_prevs, estim_prevs, tr_prevs = [], [], [], []
|
||||||
|
|
||||||
for model, model_name in [
|
for model, model_name in [
|
||||||
(CC(cls), 'CC'),
|
(CC(cls), 'CC'),
|
||||||
# (FakeFGLSD(cls, nbins=5, isomerous=False, recompute_bins=False), 'FGSLD-isometric-stat-5'),
|
# (FakeFGLSD(cls, nbins=20, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-20'),
|
||||||
(FakeFGLSD(cls, nbins=5, isomerous=True, recompute_bins=True), 'FGSLD-isometric-dyn-5'),
|
(FakeFGLSD(cls, nbins=11, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-11'),
|
||||||
# (FakeFGLSD(cls, nbins=5, isomerous=True, recompute_bins=False), 'FGSLD-isomerous-stat-5'),
|
#(FakeFGLSD(cls, nbins=8, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-8'),
|
||||||
# (FakeFGLSD(cls, nbins=10, isomerous=True, recompute_bins=True), 'FGSLD-isomerous-dyn-10'),
|
#(FakeFGLSD(cls, nbins=6, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-6'),
|
||||||
#(FakeFGLSD(cls, nbins=5, isomerous=False), 'FGSLD-5'),
|
(FakeFGLSD(cls, nbins=5, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-5'),
|
||||||
#(FakeFGLSD(cls, nbins=10, isomerous=False), 'FGSLD-10'),
|
#(FakeFGLSD(cls, nbins=4, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-4'),
|
||||||
#(FakeFGLSD(cls, nbins=50, isomerous=False), 'FGSLD-50'),
|
(FakeFGLSD(cls, nbins=3, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-3'),
|
||||||
#(FakeFGLSD(cls, nbins=100, isomerous=False), 'FGSLD-100'),
|
# (FakeFGLSD(cls, nbins=1, isomerous=False, recompute_bins=True), 'FGSLD-isometric-dyn-1'),
|
||||||
# (FakeFGLSD(cls, nbins=1, isomerous=False), 'FGSLD-1'),
|
# (FakeFGLSD(cls, nbins=3, isomerous=False, recompute_bins=False), 'FGSLD-isometric-sta-3'),
|
||||||
#(FakeFGLSD(cls, nbins=10, isomerous=True), 'FGSLD-10-ISO'),
|
|
||||||
# (FakeFGLSD(cls, nbins=50, isomerous=False), 'FGSLD-50'),
|
|
||||||
(EMQ(cls), 'SLD'),
|
(EMQ(cls), 'SLD'),
|
||||||
]:
|
]:
|
||||||
print('running ', model_name)
|
print('running ', model_name)
|
||||||
|
@ -42,6 +44,8 @@ for model, model_name in [
|
||||||
true_prevs.append(true_prev)
|
true_prevs.append(true_prev)
|
||||||
estim_prevs.append(estim_prev)
|
estim_prevs.append(estim_prev)
|
||||||
tr_prevs.append(training.prevalence())
|
tr_prevs.append(training.prevalence())
|
||||||
|
#if hasattr(model, 'iterations'):
|
||||||
|
# print(f'iterations ave={np.mean(model.iterations):.3f}, min={np.min(model.iterations):.3f}, max={np.max(model.iterations):.3f}')
|
||||||
|
|
||||||
|
|
||||||
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, train_prev=tr_prevs[0], savepath='./plot_fglsd.png')
|
qp.plot.binary_diagonal(method_names, true_prevs, estim_prevs, train_prev=tr_prevs[0], savepath='./plot_fglsd.png')
|
||||||
|
|
|
@ -14,6 +14,7 @@ class FakeFGLSD(BaseQuantifier):
|
||||||
self.nbins = nbins
|
self.nbins = nbins
|
||||||
self.isomerous = isomerous
|
self.isomerous = isomerous
|
||||||
self.recompute_bins = recompute_bins
|
self.recompute_bins = recompute_bins
|
||||||
|
self.iterations=[]
|
||||||
|
|
||||||
def fit(self, data: LabelledCollection):
|
def fit(self, data: LabelledCollection):
|
||||||
self.Xtr, self.ytr = data.Xy
|
self.Xtr, self.ytr = data.Xy
|
||||||
|
@ -24,6 +25,7 @@ class FakeFGLSD(BaseQuantifier):
|
||||||
tr_priors = F.prevalence_from_labels(self.ytr, n_classes=2)
|
tr_priors = F.prevalence_from_labels(self.ytr, n_classes=2)
|
||||||
fgsld = FineGrainedSLD(self.Xtr, instances, self.ytr, tr_priors, self.learner, n_bins=self.nbins)
|
fgsld = FineGrainedSLD(self.Xtr, instances, self.ytr, tr_priors, self.learner, n_bins=self.nbins)
|
||||||
priors, posteriors = fgsld.run(self.isomerous, compute_bins_at_every_iter=self.recompute_bins)
|
priors, posteriors = fgsld.run(self.isomerous, compute_bins_at_every_iter=self.recompute_bins)
|
||||||
|
self.iterations.append(fgsld.iterations)
|
||||||
return priors
|
return priors
|
||||||
|
|
||||||
def get_params(self, deep=True):
|
def get_params(self, deep=True):
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from metrics import isomerous_bins, isometric_bins
|
from NewMethods.fgsld.metrics import isomerous_bins, isometric_bins
|
||||||
from em import History, get_measures_single_history
|
from NewMethods.fgsld.em import History, get_measures_single_history
|
||||||
from sklearn.model_selection import cross_val_predict
|
from sklearn.model_selection import cross_val_predict
|
||||||
import math
|
import math
|
||||||
|
from scipy.special import softmax
|
||||||
|
|
||||||
class FineGrainedSLD:
|
class FineGrainedSLD:
|
||||||
def __init__(self, x_tr, x_te, y_tr, tr_priors, clf, n_bins=10):
|
def __init__(self, x_tr, x_te, y_tr, tr_priors, clf, n_bins=10):
|
||||||
|
@ -16,7 +16,7 @@ class FineGrainedSLD:
|
||||||
self.history: [History] = []
|
self.history: [History] = []
|
||||||
self.multi_class = False
|
self.multi_class = False
|
||||||
|
|
||||||
def run(self, isomerous_binning, epsilon=1e-6, compute_bins_at_every_iter=True, return_posteriors_hist=False):
|
def run(self, isomerous_binning, epsilon=1e-6, compute_bins_at_every_iter=True):
|
||||||
"""
|
"""
|
||||||
Run the FGSLD algorithm.
|
Run the FGSLD algorithm.
|
||||||
|
|
||||||
|
@ -26,22 +26,18 @@ class FineGrainedSLD:
|
||||||
:param return_posteriors_hist: whether to return posteriors at every iteration or not.
|
:param return_posteriors_hist: whether to return posteriors at every iteration or not.
|
||||||
:return: If `return_posteriors_hist` is true, the returned posteriors will be a list of numpy arrays, else a single numpy array with posteriors at last iteration.
|
:return: If `return_posteriors_hist` is true, the returned posteriors will be a list of numpy arrays, else a single numpy array with posteriors at last iteration.
|
||||||
"""
|
"""
|
||||||
smoothing_tr = 1 / (2 * self.tr_preds.shape[0])
|
smoothing_tr = 1e-9 # 1 / (2 * self.tr_preds.shape[0])
|
||||||
smoothing_te = 1 / (2 * self.te_preds.shape[0])
|
smoothing_te = 1e-9 # 1 / (2 * self.te_preds.shape[0])
|
||||||
s = 0
|
s = 0
|
||||||
tr_bin_priors = np.zeros((self.n_bins, self.tr_preds.shape[1]), dtype=np.float)
|
tr_bin_priors = np.zeros((self.n_bins, self.tr_preds.shape[1]), dtype=np.float)
|
||||||
te_bin_priors = np.zeros((self.n_bins, self.te_preds.shape[1]), dtype=np.float)
|
te_bin_priors = np.zeros((self.n_bins, self.te_preds.shape[1]), dtype=np.float)
|
||||||
tr_bins = self.__create_bins(training=True, isomerous_binning=isomerous_binning)
|
tr_bins = self.__create_bins(training=True, isomerous_binning=isomerous_binning)
|
||||||
te_bins = self.__create_bins(training=False, isomerous_binning=isomerous_binning)
|
|
||||||
self.__compute_bins_priors(tr_bin_priors, self.tr_preds, tr_bins, smoothing_tr)
|
self.__compute_bins_priors(tr_bin_priors, self.tr_preds, tr_bins, smoothing_tr)
|
||||||
|
|
||||||
|
te_preds_cp = self.te_preds.copy()
|
||||||
val = 2 * epsilon
|
val = 2 * epsilon
|
||||||
if return_posteriors_hist:
|
|
||||||
posteriors_hist = [self.te_preds.copy()]
|
|
||||||
while not val < epsilon and s < 1000:
|
while not val < epsilon and s < 1000:
|
||||||
assert np.all(np.around(self.te_preds.sum(axis=1), 4) == 1), f"Probabilities do not sum to 1:\ns={s}, " \
|
if compute_bins_at_every_iter or s==0:
|
||||||
f"probs={self.te_preds.sum(axis=1)}"
|
|
||||||
if compute_bins_at_every_iter:
|
|
||||||
te_bins = self.__create_bins(training=False, isomerous_binning=isomerous_binning)
|
te_bins = self.__create_bins(training=False, isomerous_binning=isomerous_binning)
|
||||||
|
|
||||||
if s == 0:
|
if s == 0:
|
||||||
|
@ -50,34 +46,47 @@ class FineGrainedSLD:
|
||||||
te_bin_priors_prev = te_bin_priors.copy()
|
te_bin_priors_prev = te_bin_priors.copy()
|
||||||
self.__compute_bins_priors(te_bin_priors, self.te_preds, te_bins, smoothing_te)
|
self.__compute_bins_priors(te_bin_priors, self.te_preds, te_bins, smoothing_te)
|
||||||
|
|
||||||
te_preds_cp = self.te_preds.copy()
|
|
||||||
for label_idx, bins in te_bins.items():
|
for label_idx, bins in te_bins.items():
|
||||||
for i, bin_ in enumerate(bins):
|
for i, bin_ in enumerate(bins):
|
||||||
if bin_.shape[0] == 0:
|
if bin_.shape[0] == 0:
|
||||||
continue
|
continue
|
||||||
te = te_bin_priors[i][label_idx]
|
alpha = 1
|
||||||
tr = tr_bin_priors[i][label_idx]
|
beta = 0.1
|
||||||
# local_min = (math.floor(tr * 10) / 10)
|
local_te = te_bin_priors[i][label_idx]
|
||||||
|
global_te = self.te_preds[:,label_idx].mean()
|
||||||
|
te = local_te*alpha + global_te*(1-alpha)
|
||||||
|
local_tr = tr_bin_priors[i][label_idx]
|
||||||
|
global_tr = self.tr_priors[label_idx]
|
||||||
|
tr = local_tr*beta + global_tr*(1-beta)
|
||||||
|
#local_min = (math.floor(tr * self.n_bins) / self.n_bins)
|
||||||
# local_max = local_min + .1
|
# local_max = local_min + .1
|
||||||
# trans = lambda l: min(max((l - local_min) / 1, 0), 1)
|
# trans = lambda l: min(max((l - local_min) / 1, 0), 1)
|
||||||
trans = lambda l: l
|
assert not isomerous_binning, 'not tested'
|
||||||
self.te_preds[:, label_idx][bin_] = (te_preds_cp[:, label_idx][bin_]) * \
|
#trans = lambda l: l - local_min
|
||||||
(trans(te) / trans(tr))
|
# trans = lambda l: l
|
||||||
|
# ratio = (trans(te) / trans(tr))
|
||||||
|
#ratio = np.clip(ratio, 0.1, 2)
|
||||||
|
#ratio = ratio**3
|
||||||
|
#self.te_preds[:, label_idx][bin_] = (te_preds_cp[:, label_idx][bin_]) * ratio
|
||||||
|
old_posterior = te_preds_cp[:, label_idx][bin_]
|
||||||
|
lr = 1
|
||||||
|
#self.te_preds[:, label_idx][bin_] = np.clip(old_posterior + (te-tr)*lr, 0, None)
|
||||||
|
self.te_preds[:, label_idx][bin_] = np.clip(old_posterior + (te - tr) * lr, 0, None)
|
||||||
|
#self.te_preds[:, label_idx][bin_] = (te_preds_cp[:, label_idx][bin_]) * ratio
|
||||||
|
|
||||||
# Normalization step
|
# Normalization step
|
||||||
self.te_preds = (self.te_preds / self.te_preds.sum(axis=1, keepdims=True))
|
self.te_preds = (self.te_preds / self.te_preds.sum(axis=1, keepdims=True))
|
||||||
|
#self.te_preds = softmax(self.te_preds, axis=1)
|
||||||
|
|
||||||
val = 0
|
val = np.max(np.abs(te_bin_priors / te_bin_priors_prev) - 1)
|
||||||
for label_idx in range(te_bin_priors.shape[1]):
|
|
||||||
temp = max(abs((te_bin_priors[:, label_idx] / te_bin_priors_prev[:, label_idx]) - 1))
|
|
||||||
if temp > val:
|
|
||||||
val = temp
|
|
||||||
s += 1
|
s += 1
|
||||||
if return_posteriors_hist:
|
|
||||||
posteriors_hist.append(self.te_preds.copy())
|
self.iterations = s
|
||||||
if return_posteriors_hist:
|
|
||||||
return self.te_preds.mean(axis=0), posteriors_hist
|
priors = self.te_preds.mean(axis=0)
|
||||||
return self.te_preds.mean(axis=0), self.te_preds
|
posteriors = self.te_preds
|
||||||
|
|
||||||
|
return priors, posteriors
|
||||||
|
|
||||||
def __compute_bins_priors(self, bin_priors_placeholder, posteriors, bins, smoothing):
|
def __compute_bins_priors(self, bin_priors_placeholder, posteriors, bins, smoothing):
|
||||||
for label_idx, bins in bins.items():
|
for label_idx, bins in bins.items():
|
||||||
|
@ -85,23 +94,10 @@ class FineGrainedSLD:
|
||||||
if bin_.shape[0] == 0:
|
if bin_.shape[0] == 0:
|
||||||
bin_priors_placeholder[i, label_idx] = smoothing
|
bin_priors_placeholder[i, label_idx] = smoothing
|
||||||
continue
|
continue
|
||||||
numerator = posteriors[:, label_idx][bin_].mean()
|
numerator = posteriors[bin_, label_idx].mean()
|
||||||
bin_prior = (numerator + smoothing) / (1 + self.n_bins * smoothing) # normalize priors
|
bin_prior = (numerator + smoothing) / (1 + self.n_bins * smoothing) # normalize priors
|
||||||
bin_priors_placeholder[i, label_idx] = bin_prior
|
bin_priors_placeholder[i, label_idx] = bin_prior
|
||||||
|
|
||||||
def __find_bin_idx(self, label_bins: [np.array], idx: int or list):
|
|
||||||
if hasattr(idx, '__len__'):
|
|
||||||
idxs = np.zeros(len(idx), dtype=np.int)
|
|
||||||
for i, bin_ in enumerate(label_bins):
|
|
||||||
for j, id_ in enumerate(idx):
|
|
||||||
if id_ in bin_:
|
|
||||||
idxs[j] = i
|
|
||||||
return idxs
|
|
||||||
else:
|
|
||||||
for i, bin_ in enumerate(label_bins):
|
|
||||||
if idx in bin_:
|
|
||||||
return i
|
|
||||||
|
|
||||||
def __create_bins(self, training: bool, isomerous_binning: bool):
|
def __create_bins(self, training: bool, isomerous_binning: bool):
|
||||||
bins = {}
|
bins = {}
|
||||||
preds = self.tr_preds if training else self.te_preds
|
preds = self.tr_preds if training else self.te_preds
|
||||||
|
@ -111,6 +107,6 @@ class FineGrainedSLD:
|
||||||
else:
|
else:
|
||||||
intervals = np.linspace(0., 1., num=self.n_bins, endpoint=False)
|
intervals = np.linspace(0., 1., num=self.n_bins, endpoint=False)
|
||||||
for label_idx in range(preds.shape[1]):
|
for label_idx in range(preds.shape[1]):
|
||||||
bins_ = isometric_bins(label_idx, preds, intervals, 0.1)
|
bins_ = isometric_bins(label_idx, preds, intervals)
|
||||||
bins[label_idx] = [bins_[i] for i in intervals]
|
bins[label_idx] = [bins_[i] for i in intervals]
|
||||||
return bins
|
return bins
|
||||||
|
|
|
@ -73,10 +73,21 @@ def brier_decomposition(bins, true_labels, predicted_labels, class_=1):
|
||||||
return calibration_score / (labels_len * len(bins)), refinement_score / (labels_len * len(bins))
|
return calibration_score / (labels_len * len(bins)), refinement_score / (labels_len * len(bins))
|
||||||
|
|
||||||
|
|
||||||
def isometric_bins(label_index, predicted_labels, bin_intervals, step):
|
#def isometric_bins(label_index, predicted_labels, bin_intervals, step):
|
||||||
|
# predicted_class_label = predicted_labels[:, label_index]
|
||||||
|
# return {interv: np.where(np.logical_and(interv <= predicted_class_label, predicted_class_label < interv + step))[0]
|
||||||
|
# for interv in bin_intervals}
|
||||||
|
|
||||||
|
def isometric_bins(label_index, predicted_labels, bin_intervals):
|
||||||
|
def next_intv(i):
|
||||||
|
return bin_intervals[i + 1] if (i + 1) < len(bin_intervals) else 1.
|
||||||
predicted_class_label = predicted_labels[:, label_index]
|
predicted_class_label = predicted_labels[:, label_index]
|
||||||
return {interv: np.where(np.logical_and(interv <= predicted_class_label, predicted_class_label < interv + step))[0]
|
return {
|
||||||
for interv in bin_intervals}
|
interv:
|
||||||
|
np.where(np.logical_and(interv <= predicted_class_label, predicted_class_label < next_intv(i)))[
|
||||||
|
0]
|
||||||
|
for i, interv in enumerate(bin_intervals)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def isomerous_bins(label_index, predicted_labels, n):
|
def isomerous_bins(label_index, predicted_labels, n):
|
||||||
|
|
Binary file not shown.
Before Width: | Height: | Size: 163 KiB After Width: | Height: | Size: 238 KiB |
|
@ -352,6 +352,7 @@ class EMQ(AggregativeProbabilisticQuantifier):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def EM(cls, tr_prev, posterior_probabilities, epsilon=EPSILON):
|
def EM(cls, tr_prev, posterior_probabilities, epsilon=EPSILON):
|
||||||
|
#print('training-priors', tr_prev)
|
||||||
Px = posterior_probabilities
|
Px = posterior_probabilities
|
||||||
Ptr = np.copy(tr_prev)
|
Ptr = np.copy(tr_prev)
|
||||||
qs = np.copy(Ptr) # qs (the running estimate) is initialized as the training prevalence
|
qs = np.copy(Ptr) # qs (the running estimate) is initialized as the training prevalence
|
||||||
|
@ -359,11 +360,14 @@ class EMQ(AggregativeProbabilisticQuantifier):
|
||||||
s, converged = 0, False
|
s, converged = 0, False
|
||||||
qs_prev_ = None
|
qs_prev_ = None
|
||||||
while not converged and s < EMQ.MAX_ITER:
|
while not converged and s < EMQ.MAX_ITER:
|
||||||
# E-step: ps is Ps(y=+1|xi)
|
#print('iter: ', s)
|
||||||
|
# E-step: ps is Ps(y|xi)
|
||||||
ps_unnormalized = (qs / Ptr) * Px
|
ps_unnormalized = (qs / Ptr) * Px
|
||||||
ps = ps_unnormalized / ps_unnormalized.sum(axis=1).reshape(-1,1)
|
ps = ps_unnormalized / ps_unnormalized.sum(axis=1, keepdims=True)
|
||||||
|
#print(f'\tratio=', qs / Ptr)
|
||||||
|
#print(f'\torigin_posteriors ', Px)
|
||||||
|
|
||||||
# M-step: qs_pos is Ps+1(y=+1)
|
# M-step:
|
||||||
qs = ps.mean(axis=0)
|
qs = ps.mean(axis=0)
|
||||||
|
|
||||||
if qs_prev_ is not None and qp.error.mae(qs, qs_prev_) < epsilon and s>10:
|
if qs_prev_ is not None and qp.error.mae(qs, qs_prev_) < epsilon and s>10:
|
||||||
|
@ -373,7 +377,6 @@ class EMQ(AggregativeProbabilisticQuantifier):
|
||||||
s += 1
|
s += 1
|
||||||
|
|
||||||
if not converged:
|
if not converged:
|
||||||
#raise UserWarning('the method has reached the maximum number of iterations; it might have not converged')
|
|
||||||
print('[warning] the method has reached the maximum number of iterations; it might have not converged')
|
print('[warning] the method has reached the maximum number of iterations; it might have not converged')
|
||||||
|
|
||||||
return qs, ps
|
return qs, ps
|
||||||
|
|
Loading…
Reference in New Issue