forked from moreo/QuaPy
experimental method ave-pool, not working due to the fact that onevsall is aggregative and ave-pool is not
This commit is contained in:
parent
99132c8166
commit
1ba0748b59
|
@ -1,7 +1,15 @@
|
|||
from typing import Union
|
||||
import numpy as np
|
||||
from sklearn.decomposition import PCA
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
|
||||
import quapy as qp
|
||||
from typing import Union
|
||||
|
||||
from quapy.data import LabelledCollection
|
||||
from quapy.method.base import BaseQuantifier, BinaryQuantifier
|
||||
from quapy.method.aggregative import PACC, EMQ, HDy
|
||||
import quapy.functional as F
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class PACCSLD(PACC):
|
||||
|
@ -35,3 +43,83 @@ class HDySLD(HDy):
|
|||
def aggregate(self, classif_posteriors):
|
||||
priors, posteriors = EMQ.EM(self.train_prevalence, classif_posteriors, epsilon=1e-4)
|
||||
return super(HDySLD, self).aggregate(posteriors)
|
||||
|
||||
|
||||
|
||||
class AveragePoolQuantification(BinaryQuantifier):
|
||||
def __init__(self, learner, sample_size, trials, n_components=-1, zscore=False):
|
||||
self.learner = learner
|
||||
self.sample_size = sample_size
|
||||
self.trials = trials
|
||||
|
||||
self.do_zscore = zscore
|
||||
self.zscore = StandardScaler() if self.do_zscore else None
|
||||
|
||||
self.do_pca = n_components>0
|
||||
self.pca = PCA(n_components) if self.do_pca else None
|
||||
|
||||
def fit(self, data: LabelledCollection):
|
||||
training, validation = data.split_stratified(train_prop=0.7)
|
||||
|
||||
X, y = [], []
|
||||
|
||||
nprevpoints = F.get_nprevpoints_approximation(self.trials, data.n_classes)
|
||||
for sample in tqdm(
|
||||
training.artificial_sampling_generator(self.sample_size, n_prevalences=nprevpoints, repeats=1),
|
||||
desc='generating averages'
|
||||
):
|
||||
X.append(sample.instances.mean(axis=0))
|
||||
y.append(sample.prevalence()[1])
|
||||
while len(X) < self.trials:
|
||||
sample = training.sampling(self.sample_size, F.uniform_simplex_sampling(data.n_classes))
|
||||
X.append(sample.instances.mean(axis=0))
|
||||
y.append(sample.prevalence())
|
||||
X = np.asarray(np.vstack(X))
|
||||
y = np.asarray(y)
|
||||
|
||||
if self.do_pca:
|
||||
X = self.pca.fit_transform(X)
|
||||
print(X.shape)
|
||||
|
||||
if self.do_zscore:
|
||||
X = self.zscore.fit_transform(X)
|
||||
|
||||
print('training regressor...')
|
||||
self.regressor = self.learner.fit(X, y)
|
||||
|
||||
# correction at 0:
|
||||
print('getting corrections...')
|
||||
X0 = np.asarray(np.vstack([validation.sampling(self.sample_size, 0., shuffle=False).instances.mean(axis=0) for _ in range(100)]))
|
||||
X1 = np.asarray(np.vstack([validation.sampling(self.sample_size, 1., shuffle=False).instances.mean(axis=0) for _ in range(100)]))
|
||||
|
||||
if self.do_pca:
|
||||
X0 = self.pca.transform(X0)
|
||||
X1 = self.pca.transform(X1)
|
||||
|
||||
if self.do_zscore:
|
||||
X0 = self.zscore.transform(X0)
|
||||
X1 = self.zscore.transform(X1)
|
||||
|
||||
self.correction_0 = self.regressor.predict(X0).mean()
|
||||
self.correction_1 = self.regressor.predict(X1).mean()
|
||||
|
||||
print('correction-0', self.correction_0)
|
||||
print('correction-1', self.correction_1)
|
||||
print('done')
|
||||
|
||||
def quantify(self, instances):
|
||||
ave = np.asarray(instances.mean(axis=0))
|
||||
|
||||
if self.do_pca:
|
||||
ave = self.pca.transform(ave)
|
||||
if self.do_zscore:
|
||||
ave = self.zscore.transform(ave)
|
||||
phat = self.regressor.predict(ave).item()
|
||||
phat = np.clip((phat-self.correction_0)/(self.correction_1-self.correction_0), 0, 1)
|
||||
return np.asarray([1-phat, phat])
|
||||
|
||||
def set_params(self, **parameters):
|
||||
self.learner.set_params(**parameters)
|
||||
|
||||
def get_params(self, deep=True):
|
||||
return self.learner.get_params(deep=deep)
|
||||
|
|
|
@ -32,6 +32,7 @@ nice = {
|
|||
'quanet': 'QuaNet',
|
||||
'hdy': 'HDy',
|
||||
'dys': 'DyS',
|
||||
'epaccmaeptr': 'E(PACC)$_\mathrm{Ptr}$',
|
||||
'svmperf':'',
|
||||
'sanders': 'Sanders',
|
||||
'semeval13': 'SemEval13',
|
||||
|
@ -116,7 +117,7 @@ if __name__ == '__main__':
|
|||
datasets = qp.datasets.TWITTER_SENTIMENT_DATASETS_TEST
|
||||
evaluation_measures = [qp.error.ae, qp.error.rae]
|
||||
gao_seb_methods = ['cc', 'acc', 'pcc', 'pacc', 'sld', 'svmq', 'svmkld', 'svmnkld']
|
||||
new_methods = ['hdy']
|
||||
new_methods = ['hdy', 'quanet', 'epaccptr']
|
||||
|
||||
gao_seb_ranks, gao_seb_results = get_ranks_from_Gao_Sebastiani()
|
||||
|
||||
|
|
|
@ -547,8 +547,6 @@ class OneVsAll(AggregativeQuantifier):
|
|||
else:
|
||||
predictions = self.classify(X)
|
||||
return self.aggregate(predictions)
|
||||
#prevalences = self.__parallel(self._delayed_binary_quantify, X)
|
||||
#return F.normalize_prevalence(prevalences)
|
||||
|
||||
def __parallel(self, func, *args, **kwargs):
|
||||
return np.asarray(
|
||||
|
|
25
test.py
25
test.py
|
@ -1,10 +1,12 @@
|
|||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.svm import LinearSVC, LinearSVR
|
||||
import quapy as qp
|
||||
import quapy.functional as F
|
||||
import sys
|
||||
import numpy as np
|
||||
|
||||
from NewMethods.methods import AveragePoolQuantification
|
||||
from classification.methods import PCALR
|
||||
from classification.neural import NeuralClassifierTrainer, CNNnet
|
||||
from quapy.model_selection import GridSearchQ
|
||||
|
@ -29,7 +31,7 @@ if binary:
|
|||
|
||||
else:
|
||||
dataset = qp.datasets.fetch_twitter('hcr', for_model_selection=False, min_df=10, pickle=True)
|
||||
dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
|
||||
#dataset.training = dataset.training.sampling(sample_size, 0.2, 0.5, 0.3)
|
||||
|
||||
print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.test)}')
|
||||
|
||||
|
@ -51,14 +53,17 @@ print(f'dataset loaded: #training={len(dataset.training)} #test={len(dataset.tes
|
|||
#model = qp.method.meta.QuaNet(learner, sample_size, device='cpu')
|
||||
|
||||
#learner = GridSearchCV(LogisticRegression(max_iter=1000), param_grid=param_grid, n_jobs=-1, verbose=1)
|
||||
learner = LogisticRegression(max_iter=1000)
|
||||
#learner = LogisticRegression(max_iter=1000)
|
||||
# model = qp.method.aggregative.ClassifyAndCount(learner)
|
||||
|
||||
|
||||
model = qp.method.meta.EPACC(learner, size=10, red_size=5,
|
||||
param_grid={'C':[1,10,100]},
|
||||
optim='mae', param_mod_sel={'sample_size':100, 'n_prevpoints':21, 'n_repetitions':5},
|
||||
policy='ptr', n_jobs=1)
|
||||
#model = qp.method.meta.EPACC(learner, size=10, red_size=5,
|
||||
# param_grid={'C':[1,10,100]},
|
||||
# optim='mae', param_mod_sel={'sample_size':100, 'n_prevpoints':21, 'n_repetitions':5},
|
||||
# policy='ptr', n_jobs=1)
|
||||
regressor = LinearSVR(max_iter=10000)
|
||||
param_grid = {'C': np.logspace(-1,3,5)}
|
||||
model = AveragePoolQuantification(regressor, sample_size, trials=5000, n_components=500, zscore=False)
|
||||
|
||||
#model = qp.method.meta.EHDy(learner, param_grid=param_grid, optim='mae',
|
||||
# sample_size=sample_size, eval_budget=max_evaluations//10, n_jobs=-1)
|
||||
|
@ -75,7 +80,7 @@ if qp.isbinary(model) and not qp.isbinary(dataset):
|
|||
print(f'fitting model {model.__class__.__name__}')
|
||||
#train, val = dataset.training.split_stratified(0.6)
|
||||
#model.fit(train, val_split=val)
|
||||
model.fit(dataset.training, val_split=dataset.test)
|
||||
model.fit(dataset.training)
|
||||
|
||||
|
||||
|
||||
|
@ -112,7 +117,7 @@ for error in qp.error.QUANTIFICATION_ERROR:
|
|||
score = error(true_prev, estim_prev)
|
||||
print(f'{error.__name__}={score:.5f}')
|
||||
|
||||
sys.exit(0)
|
||||
#sys.exit(0)
|
||||
# Model selection and Evaluation according to the artificial sampling protocol
|
||||
# ----------------------------------------------------------------------------
|
||||
|
||||
|
@ -123,7 +128,7 @@ model_selection = GridSearchQ(model,
|
|||
error='mae',
|
||||
refit=True,
|
||||
verbose=True,
|
||||
timeout=4)
|
||||
timeout=60*60)
|
||||
|
||||
model = model_selection.fit(dataset.training, val_split=0.3)
|
||||
#model = model_selection.fit(train, validation=val)
|
||||
|
|
Loading…
Reference in New Issue