forked from moreo/QuaPy
new methods, some experiments added
This commit is contained in:
parent
7b8e6462ff
commit
60b6fa3c12
|
@ -0,0 +1,14 @@
|
||||||
|
Things to test:
|
||||||
|
- MultiChain for classification, MultiChain for regression?
|
||||||
|
|
||||||
|
- Independent classifiers + independent quantifiers
|
||||||
|
- Stacking + independent quantifiers
|
||||||
|
- ClassifierChain + independent quantifiers
|
||||||
|
- Independent quantifiers + cross-class regression (independent?)
|
||||||
|
- Stacking + cross-class regression
|
||||||
|
- ClassifierChain + cross-class regression
|
||||||
|
- Covariates (Means, CovMatrix from samples) + multioutput regression?
|
||||||
|
- Covariates concatented with quantifiers predictions + cross-class regression?
|
||||||
|
|
||||||
|
- Model Selection for specific protocols?
|
||||||
|
|
|
@ -154,15 +154,24 @@ class MultilabelNaiveAggregativeQuantifier(MultilabelNaiveQuantifier, MLAggregat
|
||||||
return self.aggregate(predictions)
|
return self.aggregate(predictions)
|
||||||
|
|
||||||
|
|
||||||
class MultilabelRegressionQuantification:
|
class MLRegressionQuantification:
|
||||||
def __init__(self, base_quantifier=CC(LinearSVC()), regression='ridge', n_samples=500, sample_size=500, norm=True,
|
def __init__(self,
|
||||||
means=True, stds=True):
|
mlquantifier=MultilabelNaiveQuantifier(CC(LinearSVC())),
|
||||||
|
regression='ridge',
|
||||||
|
protocol='npp',
|
||||||
|
n_samples=500,
|
||||||
|
sample_size=500,
|
||||||
|
norm=True,
|
||||||
|
means=True,
|
||||||
|
stds=True):
|
||||||
assert regression in ['ridge', 'svr'], 'unknown regression model'
|
assert regression in ['ridge', 'svr'], 'unknown regression model'
|
||||||
self.estimator = MultilabelNaiveQuantifier(base_quantifier)
|
assert protocol in ['npp', 'app'], 'unknown protocol'
|
||||||
|
self.estimator = mlquantifier
|
||||||
if regression == 'ridge':
|
if regression == 'ridge':
|
||||||
self.reg = Ridge(normalize=norm)
|
self.reg = Ridge(normalize=norm)
|
||||||
elif regression == 'svr':
|
elif regression == 'svr':
|
||||||
self.reg = MultiOutputRegressor(LinearSVR())
|
self.reg = MultiOutputRegressor(LinearSVR())
|
||||||
|
self.protocol = protocol
|
||||||
# self.reg = MultiTaskLassoCV(normalize=norm)
|
# self.reg = MultiTaskLassoCV(normalize=norm)
|
||||||
# self.reg = KernelRidge(kernel='rbf')
|
# self.reg = KernelRidge(kernel='rbf')
|
||||||
# self.reg = LassoLarsCV(normalize=norm)
|
# self.reg = LassoLarsCV(normalize=norm)
|
||||||
|
@ -174,25 +183,11 @@ class MultilabelRegressionQuantification:
|
||||||
self.regression = regression
|
self.regression = regression
|
||||||
self.n_samples = n_samples
|
self.n_samples = n_samples
|
||||||
self.sample_size = sample_size
|
self.sample_size = sample_size
|
||||||
self.norm = StandardScaler()
|
# self.norm = StandardScaler()
|
||||||
self.means = means
|
self.means = means
|
||||||
self.stds = stds
|
self.stds = stds
|
||||||
|
|
||||||
def fit(self, data:MultilabelledCollection):
|
def _prepare_arrays(self, Xs, ys, samples_mean, samples_std):
|
||||||
self.classes_ = data.classes_
|
|
||||||
tr, te = data.train_test_split()
|
|
||||||
self.estimator.fit(tr)
|
|
||||||
samples_mean = []
|
|
||||||
samples_std = []
|
|
||||||
Xs = []
|
|
||||||
ys = []
|
|
||||||
for sample in te.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
|
|
||||||
ys.append(sample.prevalence()[:,1])
|
|
||||||
Xs.append(self.estimator.quantify(sample.instances)[:,1])
|
|
||||||
if self.means:
|
|
||||||
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
|
|
||||||
if self.stds:
|
|
||||||
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
|
|
||||||
Xs = np.asarray(Xs)
|
Xs = np.asarray(Xs)
|
||||||
ys = np.asarray(ys)
|
ys = np.asarray(ys)
|
||||||
if self.means:
|
if self.means:
|
||||||
|
@ -201,7 +196,49 @@ class MultilabelRegressionQuantification:
|
||||||
if self.stds:
|
if self.stds:
|
||||||
samples_std = np.asarray(samples_std)
|
samples_std = np.asarray(samples_std)
|
||||||
Xs = np.hstack([Xs, samples_std])
|
Xs = np.hstack([Xs, samples_std])
|
||||||
Xs = self.norm.fit_transform(Xs)
|
return Xs, ys
|
||||||
|
|
||||||
|
def generate_samples_npp(self, val):
|
||||||
|
samples_mean = []
|
||||||
|
samples_std = []
|
||||||
|
Xs = []
|
||||||
|
ys = []
|
||||||
|
for sample in val.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
|
||||||
|
ys.append(sample.prevalence()[:, 1])
|
||||||
|
Xs.append(self.estimator.quantify(sample.instances)[:, 1])
|
||||||
|
if self.means:
|
||||||
|
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
|
||||||
|
if self.stds:
|
||||||
|
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
|
||||||
|
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
|
||||||
|
|
||||||
|
def generate_samples_app(self, val):
|
||||||
|
samples_mean = []
|
||||||
|
samples_std = []
|
||||||
|
Xs = []
|
||||||
|
ys = []
|
||||||
|
ncats = len(self.classes_)
|
||||||
|
nprevs = 21
|
||||||
|
repeats = max(self.n_samples // (ncats * nprevs), 1)
|
||||||
|
for cat in self.classes_:
|
||||||
|
for sample in val.artificial_sampling_generator(sample_size=self.sample_size, category=cat, n_prevalences=nprevs, repeats=repeats):
|
||||||
|
ys.append(sample.prevalence()[:, 1])
|
||||||
|
Xs.append(self.estimator.quantify(sample.instances)[:, 1])
|
||||||
|
if self.means:
|
||||||
|
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
|
||||||
|
if self.stds:
|
||||||
|
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
|
||||||
|
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
|
||||||
|
|
||||||
|
def fit(self, data:MultilabelledCollection):
|
||||||
|
self.classes_ = data.classes_
|
||||||
|
tr, val = data.train_test_split()
|
||||||
|
self.estimator.fit(tr)
|
||||||
|
if self.protocol == 'npp':
|
||||||
|
Xs, ys = self.generate_samples_npp(val)
|
||||||
|
elif self.protocol == 'app':
|
||||||
|
Xs, ys = self.generate_samples_app(val)
|
||||||
|
# Xs = self.norm.fit_transform(Xs)
|
||||||
self.reg.fit(Xs, ys)
|
self.reg.fit(Xs, ys)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -213,9 +250,9 @@ class MultilabelRegressionQuantification:
|
||||||
if self.stds:
|
if self.stds:
|
||||||
sample_std = instances.todense().std(axis=0).getA()
|
sample_std = instances.todense().std(axis=0).getA()
|
||||||
Xs = np.hstack([Xs, sample_std])
|
Xs = np.hstack([Xs, sample_std])
|
||||||
Xs = self.norm.transform(Xs)
|
# Xs = self.norm.transform(Xs)
|
||||||
Xs = self.reg.predict(Xs)
|
Xs = self.reg.predict(Xs)
|
||||||
Xs = self.norm.inverse_transform(Xs)
|
# Xs = self.norm.inverse_transform(Xs)
|
||||||
adjusted = np.clip(Xs, 0, 1)
|
adjusted = np.clip(Xs, 0, 1)
|
||||||
adjusted = adjusted.flatten()
|
adjusted = adjusted.flatten()
|
||||||
neg_prevs = 1-adjusted
|
neg_prevs = 1-adjusted
|
||||||
|
|
|
@ -6,7 +6,7 @@ from tqdm import tqdm
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from MultiLabel.mlclassification import MultilabelStackedClassifier
|
from MultiLabel.mlclassification import MultilabelStackedClassifier
|
||||||
from MultiLabel.mldata import MultilabelledCollection
|
from MultiLabel.mldata import MultilabelledCollection
|
||||||
from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC, MultilabelRegressionQuantification, \
|
from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC, MLRegressionQuantification, \
|
||||||
MLACC, \
|
MLACC, \
|
||||||
MLPACC, MultilabelNaiveAggregativeQuantifier
|
MLPACC, MultilabelNaiveAggregativeQuantifier
|
||||||
from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
|
from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
|
||||||
|
@ -44,11 +44,23 @@ def models():
|
||||||
# yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None, order='random'))
|
# yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None, order='random'))
|
||||||
# yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None, order='random'))
|
# yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None, order='random'))
|
||||||
# yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None, order='random'))
|
# yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None, order='random'))
|
||||||
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False}
|
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False, 'regression':'svr'}
|
||||||
yield 'MRQ-CC', MultilabelRegressionQuantification(base_quantifier=CC(cls()), regression='svr', **common)
|
# yield 'MRQ-CC', MLRegressionQuantification(MultilabelNaiveQuantifier(CC(cls())), **common)
|
||||||
yield 'MRQ-PCC', MultilabelRegressionQuantification(base_quantifier=PCC(cls()), regression='svr', **common)
|
# yield 'MRQ-PCC', MLRegressionQuantification(MultilabelNaiveQuantifier(PCC(cls())), **common)
|
||||||
yield 'MRQ-ACC', MultilabelRegressionQuantification(base_quantifier=ACC(cls()), regression='svr', **common)
|
# yield 'MRQ-ACC', MLRegressionQuantification(MultilabelNaiveQuantifier(ACC(cls())), **common)
|
||||||
yield 'MRQ-PACC', MultilabelRegressionQuantification(base_quantifier=PACC(cls()), regression='svr', **common)
|
# yield 'MRQ-PACC', MLRegressionQuantification(MultilabelNaiveQuantifier(PACC(cls())), **common)
|
||||||
|
# yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), **common)
|
||||||
|
# yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), **common)
|
||||||
|
# yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), **common)
|
||||||
|
# yield 'MRQ-StackPACC', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), **common)
|
||||||
|
yield 'MRQ-StackCC-app', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
# yield 'MRQ-ChainCC', MLRegressionQuantification(MLCC(ClassifierChain(cls())), **common)
|
||||||
|
# yield 'MRQ-ChainPCC', MLRegressionQuantification(MLPCC(ClassifierChain(cls())), **common)
|
||||||
|
# yield 'MRQ-ChainACC', MLRegressionQuantification(MLACC(ClassifierChain(cls())), **common)
|
||||||
|
# yield 'MRQ-ChainPACC', MLRegressionQuantification(MLPACC(ClassifierChain(cls())), **common)
|
||||||
|
|
||||||
|
|
||||||
dataset = 'reuters21578'
|
dataset = 'reuters21578'
|
||||||
|
|
|
@ -0,0 +1,79 @@
|
||||||
|
num categories = 10
|
||||||
|
Train-counts: [1650 181 389 2877 433 347 538 197 369 212]
|
||||||
|
Test-counts: [ 719 56 189 1087 149 131 179 89 117 71]
|
||||||
|
MLPE: 0.01101
|
||||||
|
|
||||||
|
NPP:
|
||||||
|
NaiveCC mae=0.01718
|
||||||
|
NaivePCC mae=0.00898
|
||||||
|
NaiveACC mae=0.01560
|
||||||
|
NaivePACC mae=0.01062
|
||||||
|
|
||||||
|
StackCC mae=0.00790
|
||||||
|
StackPCC mae=0.00659 **
|
||||||
|
StackACC mae=0.00913
|
||||||
|
StackPACC mae=0.00771
|
||||||
|
|
||||||
|
ChainCC mae=0.01644
|
||||||
|
ChainPCC mae=0.00924
|
||||||
|
ChainACC mae=0.01767
|
||||||
|
ChainPACC mae=0.01140
|
||||||
|
|
||||||
|
MRQ-CC mae=0.01130
|
||||||
|
MRQ-PCC mae=0.00941
|
||||||
|
MRQ-ACC mae=0.01153
|
||||||
|
MRQ-PACC mae=0.01000
|
||||||
|
|
||||||
|
MRQ-StackCC mae=0.00757
|
||||||
|
MRQ-StackPCC mae=0.00652 **
|
||||||
|
MRQ-StackACC mae=0.00799
|
||||||
|
MRQ-StackPACC mae=0.00763
|
||||||
|
|
||||||
|
MRQ-StackCC-app mae=0.00791
|
||||||
|
MRQ-StackPCC-appmae=0.00840
|
||||||
|
MRQ-StackACC-appmae=0.00910
|
||||||
|
MRQ-StackPACC-apmae=0.00941
|
||||||
|
|
||||||
|
MRQ-ChainCC mae=0.00989
|
||||||
|
MRQ-ChainPCC mae=0.00916
|
||||||
|
MRQ-ChainACC mae=0.01251
|
||||||
|
MRQ-ChainPACC mae=0.00954
|
||||||
|
|
||||||
|
APP:
|
||||||
|
NaiveCC mae=0.04120
|
||||||
|
NaivePCC mae=0.03741
|
||||||
|
NaiveACC mae=0.03202
|
||||||
|
NaivePACC mae=0.02293
|
||||||
|
|
||||||
|
StackCC mae=0.01969
|
||||||
|
StackPCC mae=0.01871
|
||||||
|
StackACC mae=0.01386 **
|
||||||
|
StackPACC mae=0.01267 **
|
||||||
|
|
||||||
|
ChainCC mae=0.04136
|
||||||
|
ChainPCC mae=0.03571
|
||||||
|
ChainACC mae=0.03622
|
||||||
|
ChainPACC mae=0.02659
|
||||||
|
|
||||||
|
MRQ-CC mae=0.04356
|
||||||
|
MRQ-PCC mae=0.02532
|
||||||
|
MRQ-ACC mae=0.05716
|
||||||
|
MRQ-PACC mae=0.02936
|
||||||
|
|
||||||
|
MRQ-StackCC mae=0.02448
|
||||||
|
MRQ-StackPCC mae=0.02090
|
||||||
|
MRQ-StackACC mae=0.02579
|
||||||
|
MRQ-StackPACC mae=0.02388
|
||||||
|
|
||||||
|
MRQ-StackCC-app mae=0.01535
|
||||||
|
MRQ-StackPCC-appmae=0.01457
|
||||||
|
MRQ-StackACC-appmae=0.01441
|
||||||
|
MRQ-StackPACC-apmae=0.01633
|
||||||
|
|
||||||
|
MRQ-ChainCC mae=0.04874
|
||||||
|
MRQ-ChainPCC mae=0.02537
|
||||||
|
MRQ-ChainACC mae=0.06262
|
||||||
|
MRQ-ChainPACC mae=0.02906
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue