1
0
Fork 0

new methods, some experiments added

This commit is contained in:
Alejandro Moreo Fernandez 2021-07-06 18:26:05 +02:00
parent 7b8e6462ff
commit 60b6fa3c12
4 changed files with 171 additions and 29 deletions

14
MultiLabel/NOTES.txt Normal file
View File

@ -0,0 +1,14 @@
Things to test:
- MultiChain for classification, MultiChain for regression?
- Independent classifiers + independent quantifiers
- Stacking + independent quantifiers
- ClassifierChain + independent quantifiers
- Independent quantifiers + cross-class regression (independent?)
- Stacking + cross-class regression
- ClassifierChain + cross-class regression
- Covariates (Means, CovMatrix from samples) + multioutput regression?
- Covariates concatented with quantifiers predictions + cross-class regression?
- Model Selection for specific protocols?

View File

@ -154,15 +154,24 @@ class MultilabelNaiveAggregativeQuantifier(MultilabelNaiveQuantifier, MLAggregat
return self.aggregate(predictions) return self.aggregate(predictions)
class MultilabelRegressionQuantification: class MLRegressionQuantification:
def __init__(self, base_quantifier=CC(LinearSVC()), regression='ridge', n_samples=500, sample_size=500, norm=True, def __init__(self,
means=True, stds=True): mlquantifier=MultilabelNaiveQuantifier(CC(LinearSVC())),
regression='ridge',
protocol='npp',
n_samples=500,
sample_size=500,
norm=True,
means=True,
stds=True):
assert regression in ['ridge', 'svr'], 'unknown regression model' assert regression in ['ridge', 'svr'], 'unknown regression model'
self.estimator = MultilabelNaiveQuantifier(base_quantifier) assert protocol in ['npp', 'app'], 'unknown protocol'
self.estimator = mlquantifier
if regression == 'ridge': if regression == 'ridge':
self.reg = Ridge(normalize=norm) self.reg = Ridge(normalize=norm)
elif regression == 'svr': elif regression == 'svr':
self.reg = MultiOutputRegressor(LinearSVR()) self.reg = MultiOutputRegressor(LinearSVR())
self.protocol = protocol
# self.reg = MultiTaskLassoCV(normalize=norm) # self.reg = MultiTaskLassoCV(normalize=norm)
# self.reg = KernelRidge(kernel='rbf') # self.reg = KernelRidge(kernel='rbf')
# self.reg = LassoLarsCV(normalize=norm) # self.reg = LassoLarsCV(normalize=norm)
@ -174,25 +183,11 @@ class MultilabelRegressionQuantification:
self.regression = regression self.regression = regression
self.n_samples = n_samples self.n_samples = n_samples
self.sample_size = sample_size self.sample_size = sample_size
self.norm = StandardScaler() # self.norm = StandardScaler()
self.means = means self.means = means
self.stds = stds self.stds = stds
def fit(self, data:MultilabelledCollection): def _prepare_arrays(self, Xs, ys, samples_mean, samples_std):
self.classes_ = data.classes_
tr, te = data.train_test_split()
self.estimator.fit(tr)
samples_mean = []
samples_std = []
Xs = []
ys = []
for sample in te.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
ys.append(sample.prevalence()[:,1])
Xs.append(self.estimator.quantify(sample.instances)[:,1])
if self.means:
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
if self.stds:
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
Xs = np.asarray(Xs) Xs = np.asarray(Xs)
ys = np.asarray(ys) ys = np.asarray(ys)
if self.means: if self.means:
@ -201,7 +196,49 @@ class MultilabelRegressionQuantification:
if self.stds: if self.stds:
samples_std = np.asarray(samples_std) samples_std = np.asarray(samples_std)
Xs = np.hstack([Xs, samples_std]) Xs = np.hstack([Xs, samples_std])
Xs = self.norm.fit_transform(Xs) return Xs, ys
def generate_samples_npp(self, val):
samples_mean = []
samples_std = []
Xs = []
ys = []
for sample in val.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
ys.append(sample.prevalence()[:, 1])
Xs.append(self.estimator.quantify(sample.instances)[:, 1])
if self.means:
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
if self.stds:
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
def generate_samples_app(self, val):
samples_mean = []
samples_std = []
Xs = []
ys = []
ncats = len(self.classes_)
nprevs = 21
repeats = max(self.n_samples // (ncats * nprevs), 1)
for cat in self.classes_:
for sample in val.artificial_sampling_generator(sample_size=self.sample_size, category=cat, n_prevalences=nprevs, repeats=repeats):
ys.append(sample.prevalence()[:, 1])
Xs.append(self.estimator.quantify(sample.instances)[:, 1])
if self.means:
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
if self.stds:
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
def fit(self, data:MultilabelledCollection):
self.classes_ = data.classes_
tr, val = data.train_test_split()
self.estimator.fit(tr)
if self.protocol == 'npp':
Xs, ys = self.generate_samples_npp(val)
elif self.protocol == 'app':
Xs, ys = self.generate_samples_app(val)
# Xs = self.norm.fit_transform(Xs)
self.reg.fit(Xs, ys) self.reg.fit(Xs, ys)
return self return self
@ -213,9 +250,9 @@ class MultilabelRegressionQuantification:
if self.stds: if self.stds:
sample_std = instances.todense().std(axis=0).getA() sample_std = instances.todense().std(axis=0).getA()
Xs = np.hstack([Xs, sample_std]) Xs = np.hstack([Xs, sample_std])
Xs = self.norm.transform(Xs) # Xs = self.norm.transform(Xs)
Xs = self.reg.predict(Xs) Xs = self.reg.predict(Xs)
Xs = self.norm.inverse_transform(Xs) # Xs = self.norm.inverse_transform(Xs)
adjusted = np.clip(Xs, 0, 1) adjusted = np.clip(Xs, 0, 1)
adjusted = adjusted.flatten() adjusted = adjusted.flatten()
neg_prevs = 1-adjusted neg_prevs = 1-adjusted

View File

@ -6,7 +6,7 @@ from tqdm import tqdm
import quapy as qp import quapy as qp
from MultiLabel.mlclassification import MultilabelStackedClassifier from MultiLabel.mlclassification import MultilabelStackedClassifier
from MultiLabel.mldata import MultilabelledCollection from MultiLabel.mldata import MultilabelledCollection
from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC, MultilabelRegressionQuantification, \ from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC, MLRegressionQuantification, \
MLACC, \ MLACC, \
MLPACC, MultilabelNaiveAggregativeQuantifier MLPACC, MultilabelNaiveAggregativeQuantifier
from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
@ -44,11 +44,23 @@ def models():
# yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None, order='random')) # yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None, order='random'))
# yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None, order='random')) # yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None, order='random'))
# yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None, order='random')) # yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None, order='random'))
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False} common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False, 'regression':'svr'}
yield 'MRQ-CC', MultilabelRegressionQuantification(base_quantifier=CC(cls()), regression='svr', **common) # yield 'MRQ-CC', MLRegressionQuantification(MultilabelNaiveQuantifier(CC(cls())), **common)
yield 'MRQ-PCC', MultilabelRegressionQuantification(base_quantifier=PCC(cls()), regression='svr', **common) # yield 'MRQ-PCC', MLRegressionQuantification(MultilabelNaiveQuantifier(PCC(cls())), **common)
yield 'MRQ-ACC', MultilabelRegressionQuantification(base_quantifier=ACC(cls()), regression='svr', **common) # yield 'MRQ-ACC', MLRegressionQuantification(MultilabelNaiveQuantifier(ACC(cls())), **common)
yield 'MRQ-PACC', MultilabelRegressionQuantification(base_quantifier=PACC(cls()), regression='svr', **common) # yield 'MRQ-PACC', MLRegressionQuantification(MultilabelNaiveQuantifier(PACC(cls())), **common)
# yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), **common)
# yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), **common)
# yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), **common)
# yield 'MRQ-StackPACC', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), **common)
yield 'MRQ-StackCC-app', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
# yield 'MRQ-ChainCC', MLRegressionQuantification(MLCC(ClassifierChain(cls())), **common)
# yield 'MRQ-ChainPCC', MLRegressionQuantification(MLPCC(ClassifierChain(cls())), **common)
# yield 'MRQ-ChainACC', MLRegressionQuantification(MLACC(ClassifierChain(cls())), **common)
# yield 'MRQ-ChainPACC', MLRegressionQuantification(MLPACC(ClassifierChain(cls())), **common)
dataset = 'reuters21578' dataset = 'reuters21578'

79
MultiLabel/results.txt Normal file
View File

@ -0,0 +1,79 @@
num categories = 10
Train-counts: [1650 181 389 2877 433 347 538 197 369 212]
Test-counts: [ 719 56 189 1087 149 131 179 89 117 71]
MLPE: 0.01101
NPP:
NaiveCC mae=0.01718
NaivePCC mae=0.00898
NaiveACC mae=0.01560
NaivePACC mae=0.01062
StackCC mae=0.00790
StackPCC mae=0.00659 **
StackACC mae=0.00913
StackPACC mae=0.00771
ChainCC mae=0.01644
ChainPCC mae=0.00924
ChainACC mae=0.01767
ChainPACC mae=0.01140
MRQ-CC mae=0.01130
MRQ-PCC mae=0.00941
MRQ-ACC mae=0.01153
MRQ-PACC mae=0.01000
MRQ-StackCC mae=0.00757
MRQ-StackPCC mae=0.00652 **
MRQ-StackACC mae=0.00799
MRQ-StackPACC mae=0.00763
MRQ-StackCC-app mae=0.00791
MRQ-StackPCC-appmae=0.00840
MRQ-StackACC-appmae=0.00910
MRQ-StackPACC-apmae=0.00941
MRQ-ChainCC mae=0.00989
MRQ-ChainPCC mae=0.00916
MRQ-ChainACC mae=0.01251
MRQ-ChainPACC mae=0.00954
APP:
NaiveCC mae=0.04120
NaivePCC mae=0.03741
NaiveACC mae=0.03202
NaivePACC mae=0.02293
StackCC mae=0.01969
StackPCC mae=0.01871
StackACC mae=0.01386 **
StackPACC mae=0.01267 **
ChainCC mae=0.04136
ChainPCC mae=0.03571
ChainACC mae=0.03622
ChainPACC mae=0.02659
MRQ-CC mae=0.04356
MRQ-PCC mae=0.02532
MRQ-ACC mae=0.05716
MRQ-PACC mae=0.02936
MRQ-StackCC mae=0.02448
MRQ-StackPCC mae=0.02090
MRQ-StackACC mae=0.02579
MRQ-StackPACC mae=0.02388
MRQ-StackCC-app mae=0.01535
MRQ-StackPCC-appmae=0.01457
MRQ-StackACC-appmae=0.01441
MRQ-StackPACC-apmae=0.01633
MRQ-ChainCC mae=0.04874
MRQ-ChainPCC mae=0.02537
MRQ-ChainACC mae=0.06262
MRQ-ChainPACC mae=0.02906