1
0
Fork 0

new methods, some experiments added

This commit is contained in:
Alejandro Moreo Fernandez 2021-07-06 18:26:05 +02:00
parent 7b8e6462ff
commit 60b6fa3c12
4 changed files with 171 additions and 29 deletions

14
MultiLabel/NOTES.txt Normal file
View File

@ -0,0 +1,14 @@
Things to test:
- MultiChain for classification, MultiChain for regression?
- Independent classifiers + independent quantifiers
- Stacking + independent quantifiers
- ClassifierChain + independent quantifiers
- Independent quantifiers + cross-class regression (independent?)
- Stacking + cross-class regression
- ClassifierChain + cross-class regression
- Covariates (Means, CovMatrix from samples) + multioutput regression?
- Covariates concatented with quantifiers predictions + cross-class regression?
- Model Selection for specific protocols?

View File

@ -154,15 +154,24 @@ class MultilabelNaiveAggregativeQuantifier(MultilabelNaiveQuantifier, MLAggregat
return self.aggregate(predictions)
class MultilabelRegressionQuantification:
def __init__(self, base_quantifier=CC(LinearSVC()), regression='ridge', n_samples=500, sample_size=500, norm=True,
means=True, stds=True):
class MLRegressionQuantification:
def __init__(self,
mlquantifier=MultilabelNaiveQuantifier(CC(LinearSVC())),
regression='ridge',
protocol='npp',
n_samples=500,
sample_size=500,
norm=True,
means=True,
stds=True):
assert regression in ['ridge', 'svr'], 'unknown regression model'
self.estimator = MultilabelNaiveQuantifier(base_quantifier)
assert protocol in ['npp', 'app'], 'unknown protocol'
self.estimator = mlquantifier
if regression == 'ridge':
self.reg = Ridge(normalize=norm)
elif regression == 'svr':
self.reg = MultiOutputRegressor(LinearSVR())
self.protocol = protocol
# self.reg = MultiTaskLassoCV(normalize=norm)
# self.reg = KernelRidge(kernel='rbf')
# self.reg = LassoLarsCV(normalize=norm)
@ -174,25 +183,11 @@ class MultilabelRegressionQuantification:
self.regression = regression
self.n_samples = n_samples
self.sample_size = sample_size
self.norm = StandardScaler()
# self.norm = StandardScaler()
self.means = means
self.stds = stds
def fit(self, data:MultilabelledCollection):
self.classes_ = data.classes_
tr, te = data.train_test_split()
self.estimator.fit(tr)
samples_mean = []
samples_std = []
Xs = []
ys = []
for sample in te.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
ys.append(sample.prevalence()[:,1])
Xs.append(self.estimator.quantify(sample.instances)[:,1])
if self.means:
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
if self.stds:
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
def _prepare_arrays(self, Xs, ys, samples_mean, samples_std):
Xs = np.asarray(Xs)
ys = np.asarray(ys)
if self.means:
@ -201,7 +196,49 @@ class MultilabelRegressionQuantification:
if self.stds:
samples_std = np.asarray(samples_std)
Xs = np.hstack([Xs, samples_std])
Xs = self.norm.fit_transform(Xs)
return Xs, ys
def generate_samples_npp(self, val):
samples_mean = []
samples_std = []
Xs = []
ys = []
for sample in val.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
ys.append(sample.prevalence()[:, 1])
Xs.append(self.estimator.quantify(sample.instances)[:, 1])
if self.means:
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
if self.stds:
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
def generate_samples_app(self, val):
samples_mean = []
samples_std = []
Xs = []
ys = []
ncats = len(self.classes_)
nprevs = 21
repeats = max(self.n_samples // (ncats * nprevs), 1)
for cat in self.classes_:
for sample in val.artificial_sampling_generator(sample_size=self.sample_size, category=cat, n_prevalences=nprevs, repeats=repeats):
ys.append(sample.prevalence()[:, 1])
Xs.append(self.estimator.quantify(sample.instances)[:, 1])
if self.means:
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
if self.stds:
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
def fit(self, data:MultilabelledCollection):
self.classes_ = data.classes_
tr, val = data.train_test_split()
self.estimator.fit(tr)
if self.protocol == 'npp':
Xs, ys = self.generate_samples_npp(val)
elif self.protocol == 'app':
Xs, ys = self.generate_samples_app(val)
# Xs = self.norm.fit_transform(Xs)
self.reg.fit(Xs, ys)
return self
@ -213,9 +250,9 @@ class MultilabelRegressionQuantification:
if self.stds:
sample_std = instances.todense().std(axis=0).getA()
Xs = np.hstack([Xs, sample_std])
Xs = self.norm.transform(Xs)
# Xs = self.norm.transform(Xs)
Xs = self.reg.predict(Xs)
Xs = self.norm.inverse_transform(Xs)
# Xs = self.norm.inverse_transform(Xs)
adjusted = np.clip(Xs, 0, 1)
adjusted = adjusted.flatten()
neg_prevs = 1-adjusted

View File

@ -6,7 +6,7 @@ from tqdm import tqdm
import quapy as qp
from MultiLabel.mlclassification import MultilabelStackedClassifier
from MultiLabel.mldata import MultilabelledCollection
from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC, MultilabelRegressionQuantification, \
from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC, MLRegressionQuantification, \
MLACC, \
MLPACC, MultilabelNaiveAggregativeQuantifier
from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
@ -44,11 +44,23 @@ def models():
# yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None, order='random'))
# yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None, order='random'))
# yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None, order='random'))
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False}
yield 'MRQ-CC', MultilabelRegressionQuantification(base_quantifier=CC(cls()), regression='svr', **common)
yield 'MRQ-PCC', MultilabelRegressionQuantification(base_quantifier=PCC(cls()), regression='svr', **common)
yield 'MRQ-ACC', MultilabelRegressionQuantification(base_quantifier=ACC(cls()), regression='svr', **common)
yield 'MRQ-PACC', MultilabelRegressionQuantification(base_quantifier=PACC(cls()), regression='svr', **common)
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False, 'regression':'svr'}
# yield 'MRQ-CC', MLRegressionQuantification(MultilabelNaiveQuantifier(CC(cls())), **common)
# yield 'MRQ-PCC', MLRegressionQuantification(MultilabelNaiveQuantifier(PCC(cls())), **common)
# yield 'MRQ-ACC', MLRegressionQuantification(MultilabelNaiveQuantifier(ACC(cls())), **common)
# yield 'MRQ-PACC', MLRegressionQuantification(MultilabelNaiveQuantifier(PACC(cls())), **common)
# yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), **common)
# yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), **common)
# yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), **common)
# yield 'MRQ-StackPACC', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), **common)
yield 'MRQ-StackCC-app', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
# yield 'MRQ-ChainCC', MLRegressionQuantification(MLCC(ClassifierChain(cls())), **common)
# yield 'MRQ-ChainPCC', MLRegressionQuantification(MLPCC(ClassifierChain(cls())), **common)
# yield 'MRQ-ChainACC', MLRegressionQuantification(MLACC(ClassifierChain(cls())), **common)
# yield 'MRQ-ChainPACC', MLRegressionQuantification(MLPACC(ClassifierChain(cls())), **common)
dataset = 'reuters21578'

79
MultiLabel/results.txt Normal file
View File

@ -0,0 +1,79 @@
num categories = 10
Train-counts: [1650 181 389 2877 433 347 538 197 369 212]
Test-counts: [ 719 56 189 1087 149 131 179 89 117 71]
MLPE: 0.01101
NPP:
NaiveCC mae=0.01718
NaivePCC mae=0.00898
NaiveACC mae=0.01560
NaivePACC mae=0.01062
StackCC mae=0.00790
StackPCC mae=0.00659 **
StackACC mae=0.00913
StackPACC mae=0.00771
ChainCC mae=0.01644
ChainPCC mae=0.00924
ChainACC mae=0.01767
ChainPACC mae=0.01140
MRQ-CC mae=0.01130
MRQ-PCC mae=0.00941
MRQ-ACC mae=0.01153
MRQ-PACC mae=0.01000
MRQ-StackCC mae=0.00757
MRQ-StackPCC mae=0.00652 **
MRQ-StackACC mae=0.00799
MRQ-StackPACC mae=0.00763
MRQ-StackCC-app mae=0.00791
MRQ-StackPCC-appmae=0.00840
MRQ-StackACC-appmae=0.00910
MRQ-StackPACC-apmae=0.00941
MRQ-ChainCC mae=0.00989
MRQ-ChainPCC mae=0.00916
MRQ-ChainACC mae=0.01251
MRQ-ChainPACC mae=0.00954
APP:
NaiveCC mae=0.04120
NaivePCC mae=0.03741
NaiveACC mae=0.03202
NaivePACC mae=0.02293
StackCC mae=0.01969
StackPCC mae=0.01871
StackACC mae=0.01386 **
StackPACC mae=0.01267 **
ChainCC mae=0.04136
ChainPCC mae=0.03571
ChainACC mae=0.03622
ChainPACC mae=0.02659
MRQ-CC mae=0.04356
MRQ-PCC mae=0.02532
MRQ-ACC mae=0.05716
MRQ-PACC mae=0.02936
MRQ-StackCC mae=0.02448
MRQ-StackPCC mae=0.02090
MRQ-StackACC mae=0.02579
MRQ-StackPACC mae=0.02388
MRQ-StackCC-app mae=0.01535
MRQ-StackPCC-appmae=0.01457
MRQ-StackACC-appmae=0.01441
MRQ-StackPACC-apmae=0.01633
MRQ-ChainCC mae=0.04874
MRQ-ChainPCC mae=0.02537
MRQ-ChainACC mae=0.06262
MRQ-ChainPACC mae=0.02906