1
0
Fork 0

some refactor made in order to accomodate OneVsAll to operate with aggregative probabilistic quantifiers; launching OneVsAll(HDy)

This commit is contained in:
Alejandro Moreo Fernandez 2021-01-18 16:52:19 +01:00
parent e2eb3b6f06
commit b30c40b7a0
7 changed files with 122 additions and 49 deletions

View File

@ -1,6 +1,6 @@
from sklearn.linear_model import LogisticRegression
import quapy as qp
from quapy.method.aggregative import OneVsAll
from quapy.method.aggregative import CC, ACC, PCC, PACC, EMQ, OneVsAll, SVMQ, SVMKLD, SVMNKLD, SVMAE, SVMRAE, HDy
import quapy.functional as F
import numpy as np
import os
@ -22,19 +22,26 @@ def quantification_models():
__C_range = np.logspace(-4, 5, 10)
lr_params = {'C': __C_range, 'class_weight': [None, 'balanced']}
svmperf_params = {'C': __C_range}
yield 'cc', qp.method.aggregative.CC(newLR()), lr_params
yield 'acc', qp.method.aggregative.ACC(newLR()), lr_params
yield 'pcc', qp.method.aggregative.PCC(newLR()), lr_params
yield 'pacc', qp.method.aggregative.PACC(newLR()), lr_params
yield 'sld', qp.method.aggregative.EMQ(newLR()), lr_params
yield 'svmq', OneVsAll(qp.method.aggregative.SVMQ(args.svmperfpath)), svmperf_params
yield 'svmkld', OneVsAll(qp.method.aggregative.SVMKLD(args.svmperfpath)), svmperf_params
yield 'svmnkld', OneVsAll(qp.method.aggregative.SVMNKLD(args.svmperfpath)), svmperf_params
yield 'svmmae', OneVsAll(qp.method.aggregative.SVMAE(args.svmperfpath)), svmperf_params
yield 'svmmrae', OneVsAll(qp.method.aggregative.SVMRAE(args.svmperfpath)), svmperf_params
#sld = qp.method.aggregative.EMQ(newLR())
#yield 'paccsld', qp.method.aggregative.PACC(sld), lr_params
# methods tested in Gao & Sebastiani 2016
yield 'cc', CC(newLR()), lr_params
yield 'acc', ACC(newLR()), lr_params
yield 'pcc', PCC(newLR()), lr_params
yield 'pacc', PACC(newLR()), lr_params
yield 'sld', EMQ(newLR()), lr_params
yield 'svmq', OneVsAll(SVMQ(args.svmperfpath)), svmperf_params
yield 'svmkld', OneVsAll(SVMKLD(args.svmperfpath)), svmperf_params
yield 'svmnkld', OneVsAll(SVMNKLD(args.svmperfpath)), svmperf_params
# methods added
yield 'svmmae', OneVsAll(SVMAE(args.svmperfpath)), svmperf_params
yield 'svmmrae', OneVsAll(SVMRAE(args.svmperfpath)), svmperf_params
yield 'hdy', OneVsAll(HDy(newLR())), lr_params
# to add:
# quapy
# ensembles
#
# 'mlpe': lambda learner: MaximumLikelihoodPrevalenceEstimation(),

View File

@ -7,7 +7,7 @@ from . import evaluation
from . import plot
from . import util
from . import model_selection
from quapy.method.aggregative import isaggregative, isprobabilistic
from quapy.method.base import isprobabilistic, isaggregative
environ = {
@ -21,3 +21,5 @@ environ = {
def isbinary(x):
return x.binary

View File

@ -8,6 +8,7 @@ import quapy as qp
from quapy.data import LabelledCollection
from quapy.method.base import BaseQuantifier
from quapy.util import temp_seed
import quapy.functional as F
def artificial_sampling_prediction(
@ -39,18 +40,18 @@ def artificial_sampling_prediction(
with temp_seed(random_seed):
indexes = list(test.artificial_sampling_index_generator(sample_size, n_prevpoints, n_repetitions))
if isinstance(model, qp.method.aggregative.AggregativeQuantifier):
# print('\tinstance of aggregative-quantifier')
if model.aggregative: #isinstance(model, qp.method.aggregative.AggregativeQuantifier):
print('\tinstance of aggregative-quantifier')
quantification_func = model.aggregate
if isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier):
# print('\t\tinstance of probabilitstic-aggregative-quantifier')
if model.probabilistic: # isinstance(model, qp.method.aggregative.AggregativeProbabilisticQuantifier):
print('\t\tinstance of probabilitstic-aggregative-quantifier')
preclassified_instances = model.posterior_probabilities(test.instances)
else:
# print('\t\tinstance of hard-aggregative-quantifier')
print('\t\tinstance of hard-aggregative-quantifier')
preclassified_instances = model.classify(test.instances)
test = LabelledCollection(preclassified_instances, test.labels)
else:
# print('\t\tinstance of base-quantifier')
print('\t\tinstance of base-quantifier')
quantification_func = model.quantify
def _predict_prevalences(index):

View File

@ -1,7 +1,6 @@
from abc import abstractmethod
from copy import deepcopy
from typing import Union
import numpy as np
from joblib import Parallel, delayed
from sklearn.base import BaseEstimator
@ -60,6 +59,10 @@ class AggregativeQuantifier(BaseQuantifier):
def classes(self):
return self.learner.classes_
@property
def aggregative(self):
return True
class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
"""
@ -84,6 +87,9 @@ class AggregativeProbabilisticQuantifier(AggregativeQuantifier):
parameters={'base_estimator__'+k:v for k,v in parameters.items()}
self.learner.set_params(**parameters)
@property
def probabilistic(self):
return True
# Helper
@ -385,6 +391,10 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
Px = self.posterior_probabilities(validation.instances)[:,1] # takes only the P(y=+1|x)
self.Pxy1 = Px[validation.labels == 1]
self.Pxy0 = Px[validation.labels == 0]
# pre-compute the histogram for positive and negative examples
self.bins = np.linspace(10, 110, 11, dtype=int) #[10, 20, 30, ..., 100, 110]
self.Pxy1_density = {bins: np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)[0] for bins in self.bins}
self.Pxy0_density = {bins: np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)[0] for bins in self.bins}
return self
def aggregate(self, classif_posteriors):
@ -395,9 +405,12 @@ class HDy(AggregativeProbabilisticQuantifier, BinaryQuantifier):
Px = classif_posteriors[:,1] # takes only the P(y=+1|x)
prev_estimations = []
for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110]
Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
#for bins in np.linspace(10, 110, 11, dtype=int): #[10, 20, 30, ..., 100, 110]
#Pxy0_density, _ = np.histogram(self.Pxy0, bins=bins, range=(0, 1), density=True)
#Pxy1_density, _ = np.histogram(self.Pxy1, bins=bins, range=(0, 1), density=True)
for bins in self.bins:
Pxy0_density = self.Pxy0_density[bins]
Pxy1_density = self.Pxy1_density[bins]
Px_test, _ = np.histogram(Px, bins=bins, range=(0, 1), density=True)
@ -488,9 +501,7 @@ class OneVsAll(AggregativeQuantifier):
assert isinstance(self.binary_quantifier, BaseQuantifier), \
f'{self.binary_quantifier} does not seem to be a Quantifier'
assert fit_learner==True, 'fit_learner must be True'
if not isinstance(self.binary_quantifier, BinaryQuantifier):
raise ValueError(f'{self.binary_quantifier.__class__.__name__} does not seem to be an instance of '
f'{BinaryQuantifier.__class__.__name__}')
self.dict_binary_quantifiers = {c: deepcopy(self.binary_quantifier) for c in data.classes_}
self.__parallel(self._delayed_binary_fit, data)
return self
@ -502,20 +513,39 @@ class OneVsAll(AggregativeQuantifier):
classif_predictions_bin = self.__parallel(self._delayed_binary_classification, instances)
return classif_predictions_bin.T
def posterior_probabilities(self, instances):
# returns a matrix of shape (n,m,2) with n the number of instances and m the number of classes. The entry
# (i,j,1) (resp. (i,j,0)) is a value in [0,1] indicating the posterior probability that instance i belongs
# (resp. does not belong) to class j.
# The posterior probabilities are independent of each other, meaning that, in general, they do not sum
# up to one.
if not self.binary_quantifier.probabilistic:
raise NotImplementedError(f'{self.__class__.__name__} does not implement posterior_probabilities because '
f'the base quantifier {self.binary_quantifier.__class__.__name__} is not '
f'probabilistic')
posterior_predictions_bin = self.__parallel(self._delayed_binary_posteriors, instances)
return np.swapaxes(posterior_predictions_bin, 0, 1)
def aggregate(self, classif_predictions_bin):
assert set(np.unique(classif_predictions_bin)).issubset({0,1}), \
'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \
'predictions for each document (row) and class (columns)'
if self.probabilistic:
assert classif_predictions_bin.shape[1]==self.n_classes and classif_predictions_bin.shape[2]==2, \
'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of posterior ' \
'probabilities (2 dimensions) for each document (row) and class (columns)'
else:
assert set(np.unique(classif_predictions_bin)).issubset({0,1}), \
'param classif_predictions_bin does not seem to be a valid matrix (ndarray) of binary ' \
'predictions for each document (row) and class (columns)'
prevalences = self.__parallel(self._delayed_binary_aggregate, classif_predictions_bin)
#prevalences = []
#for c in self.classes:
# prevalences.append(self._delayed_binary_aggregate(c, classif_predictions_bin))
#prevalences = np.asarray(prevalences)
return F.normalize_prevalence(prevalences)
def quantify(self, X):
prevalences = self.__parallel(self._delayed_binary_quantify, X)
return F.normalize_prevalence(prevalences)
if self.probabilistic:
predictions = self.posterior_probabilities(X)
else:
predictions = self.classify(X)
return self.aggregate(predictions)
#prevalences = self.__parallel(self._delayed_binary_quantify, X)
#return F.normalize_prevalence(prevalences)
def __parallel(self, func, *args, **kwargs):
return np.asarray(
@ -537,9 +567,12 @@ class OneVsAll(AggregativeQuantifier):
def _delayed_binary_classification(self, c, X):
return self.dict_binary_quantifiers[c].classify(X)
def _delayed_binary_quantify(self, c, X):
def _delayed_binary_posteriors(self, c, X):
return self.dict_binary_quantifiers[c].posterior_probabilities(X)
#def _delayed_binary_quantify(self, c, X):
# the estimation for the positive class prevalence
return self.dict_binary_quantifiers[c].quantify(X)[1]
# return self.dict_binary_quantifiers[c].quantify(X)[1]
def _delayed_binary_aggregate(self, c, classif_predictions):
# the estimation for the positive class prevalence
@ -549,13 +582,14 @@ class OneVsAll(AggregativeQuantifier):
bindata = LabelledCollection(data.instances, data.labels == c, n_classes=2)
self.dict_binary_quantifiers[c].fit(bindata)
@property
def binary(self):
return False
@property
def probabilistic(self):
return self.binary_quantifier.probabilistic
def isaggregative(model:BaseQuantifier):
return isinstance(model, AggregativeQuantifier)
def isprobabilistic(model:BaseQuantifier):
return isinstance(model, AggregativeProbabilisticQuantifier)

View File

@ -5,12 +5,10 @@ from quapy.data import LabelledCollection
# Base Quantifier abstract class
# ------------------------------------
class BaseQuantifier(metaclass=ABCMeta):
@abstractmethod
def fit(self, data): ...
def fit(self, data: LabelledCollection): ...
@abstractmethod
def quantify(self, instances): ...
@ -21,10 +19,20 @@ class BaseQuantifier(metaclass=ABCMeta):
@abstractmethod
def get_params(self, deep=True): ...
# these methods allows meta-learners to reimplement the decision based on their constituents, and not
# based on class structure
@property
def binary(self):
return False
@property
def aggregative(self):
return False
@property
def probabilistic(self):
return False
class BinaryQuantifier(BaseQuantifier):
def _check_binary(self, data: LabelledCollection, quantifier_name):
@ -40,7 +48,15 @@ def isbinary(model:BaseQuantifier):
return model.binary
# class OneVsAll(AggregativeQuantifier):
def isaggregative(model:BaseQuantifier):
return model.aggregative
def isprobabilistic(model:BaseQuantifier):
return model.probabilistic
# class OneVsAll:
# """
# Allows any binary quantifier to perform quantification on single-label datasets. The method maintains one binary
# quantifier for each class, and then l1-normalizes the outputs so that the class prevelences sum up to 1.

View File

@ -152,6 +152,19 @@ class Ensemble(BaseQuantifier):
order = np.argsort(dist)
return select_k(predictions, order, k=self.red_size)
@property
def binary(self):
return self.base_quantifier.binary
@property
def aggregative(self):
raise NotImplementedError('aggregative functionality not yet supported for Ensemble')
@property
def probabilistic(self):
raise NotImplementedError('probabilistic functionality not yet supported for Ensemble')
#return self.base_quantifier.probabilistic
def get_probability_distribution(posterior_probabilities, bins=8):
assert posterior_probabilities.shape[1]==2, 'the posterior probabilities do not seem to be for a binary problem'

View File

@ -157,7 +157,7 @@ class GridSearchQ(BaseQuantifier):
model.fit(training)
true_prevalences, estim_prevalences = artificial_sampling_prediction(
model, validation, self.sample_size, self.n_prevpoints, self.n_repetitions, n_jobs, self.random_seed,
verbose=False
verbose=True
)
score = self.error(true_prevalences, estim_prevalences)