forked from moreo/QuaPy
new methods, some experiments added
This commit is contained in:
parent
7b8e6462ff
commit
60b6fa3c12
|
@ -0,0 +1,14 @@
|
|||
Things to test:
|
||||
- MultiChain for classification, MultiChain for regression?
|
||||
|
||||
- Independent classifiers + independent quantifiers
|
||||
- Stacking + independent quantifiers
|
||||
- ClassifierChain + independent quantifiers
|
||||
- Independent quantifiers + cross-class regression (independent?)
|
||||
- Stacking + cross-class regression
|
||||
- ClassifierChain + cross-class regression
|
||||
- Covariates (Means, CovMatrix from samples) + multioutput regression?
|
||||
- Covariates concatented with quantifiers predictions + cross-class regression?
|
||||
|
||||
- Model Selection for specific protocols?
|
||||
|
|
@ -154,15 +154,24 @@ class MultilabelNaiveAggregativeQuantifier(MultilabelNaiveQuantifier, MLAggregat
|
|||
return self.aggregate(predictions)
|
||||
|
||||
|
||||
class MultilabelRegressionQuantification:
|
||||
def __init__(self, base_quantifier=CC(LinearSVC()), regression='ridge', n_samples=500, sample_size=500, norm=True,
|
||||
means=True, stds=True):
|
||||
class MLRegressionQuantification:
|
||||
def __init__(self,
|
||||
mlquantifier=MultilabelNaiveQuantifier(CC(LinearSVC())),
|
||||
regression='ridge',
|
||||
protocol='npp',
|
||||
n_samples=500,
|
||||
sample_size=500,
|
||||
norm=True,
|
||||
means=True,
|
||||
stds=True):
|
||||
assert regression in ['ridge', 'svr'], 'unknown regression model'
|
||||
self.estimator = MultilabelNaiveQuantifier(base_quantifier)
|
||||
assert protocol in ['npp', 'app'], 'unknown protocol'
|
||||
self.estimator = mlquantifier
|
||||
if regression == 'ridge':
|
||||
self.reg = Ridge(normalize=norm)
|
||||
elif regression == 'svr':
|
||||
self.reg = MultiOutputRegressor(LinearSVR())
|
||||
self.protocol = protocol
|
||||
# self.reg = MultiTaskLassoCV(normalize=norm)
|
||||
# self.reg = KernelRidge(kernel='rbf')
|
||||
# self.reg = LassoLarsCV(normalize=norm)
|
||||
|
@ -174,25 +183,11 @@ class MultilabelRegressionQuantification:
|
|||
self.regression = regression
|
||||
self.n_samples = n_samples
|
||||
self.sample_size = sample_size
|
||||
self.norm = StandardScaler()
|
||||
# self.norm = StandardScaler()
|
||||
self.means = means
|
||||
self.stds = stds
|
||||
|
||||
def fit(self, data:MultilabelledCollection):
|
||||
self.classes_ = data.classes_
|
||||
tr, te = data.train_test_split()
|
||||
self.estimator.fit(tr)
|
||||
samples_mean = []
|
||||
samples_std = []
|
||||
Xs = []
|
||||
ys = []
|
||||
for sample in te.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
|
||||
ys.append(sample.prevalence()[:,1])
|
||||
Xs.append(self.estimator.quantify(sample.instances)[:,1])
|
||||
if self.means:
|
||||
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
|
||||
if self.stds:
|
||||
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
|
||||
def _prepare_arrays(self, Xs, ys, samples_mean, samples_std):
|
||||
Xs = np.asarray(Xs)
|
||||
ys = np.asarray(ys)
|
||||
if self.means:
|
||||
|
@ -201,7 +196,49 @@ class MultilabelRegressionQuantification:
|
|||
if self.stds:
|
||||
samples_std = np.asarray(samples_std)
|
||||
Xs = np.hstack([Xs, samples_std])
|
||||
Xs = self.norm.fit_transform(Xs)
|
||||
return Xs, ys
|
||||
|
||||
def generate_samples_npp(self, val):
|
||||
samples_mean = []
|
||||
samples_std = []
|
||||
Xs = []
|
||||
ys = []
|
||||
for sample in val.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
|
||||
ys.append(sample.prevalence()[:, 1])
|
||||
Xs.append(self.estimator.quantify(sample.instances)[:, 1])
|
||||
if self.means:
|
||||
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
|
||||
if self.stds:
|
||||
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
|
||||
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
|
||||
|
||||
def generate_samples_app(self, val):
|
||||
samples_mean = []
|
||||
samples_std = []
|
||||
Xs = []
|
||||
ys = []
|
||||
ncats = len(self.classes_)
|
||||
nprevs = 21
|
||||
repeats = max(self.n_samples // (ncats * nprevs), 1)
|
||||
for cat in self.classes_:
|
||||
for sample in val.artificial_sampling_generator(sample_size=self.sample_size, category=cat, n_prevalences=nprevs, repeats=repeats):
|
||||
ys.append(sample.prevalence()[:, 1])
|
||||
Xs.append(self.estimator.quantify(sample.instances)[:, 1])
|
||||
if self.means:
|
||||
samples_mean.append(sample.instances.mean(axis=0).getA().flatten())
|
||||
if self.stds:
|
||||
samples_std.append(sample.instances.todense().std(axis=0).getA().flatten())
|
||||
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
|
||||
|
||||
def fit(self, data:MultilabelledCollection):
|
||||
self.classes_ = data.classes_
|
||||
tr, val = data.train_test_split()
|
||||
self.estimator.fit(tr)
|
||||
if self.protocol == 'npp':
|
||||
Xs, ys = self.generate_samples_npp(val)
|
||||
elif self.protocol == 'app':
|
||||
Xs, ys = self.generate_samples_app(val)
|
||||
# Xs = self.norm.fit_transform(Xs)
|
||||
self.reg.fit(Xs, ys)
|
||||
return self
|
||||
|
||||
|
@ -213,9 +250,9 @@ class MultilabelRegressionQuantification:
|
|||
if self.stds:
|
||||
sample_std = instances.todense().std(axis=0).getA()
|
||||
Xs = np.hstack([Xs, sample_std])
|
||||
Xs = self.norm.transform(Xs)
|
||||
# Xs = self.norm.transform(Xs)
|
||||
Xs = self.reg.predict(Xs)
|
||||
Xs = self.norm.inverse_transform(Xs)
|
||||
# Xs = self.norm.inverse_transform(Xs)
|
||||
adjusted = np.clip(Xs, 0, 1)
|
||||
adjusted = adjusted.flatten()
|
||||
neg_prevs = 1-adjusted
|
||||
|
|
|
@ -6,7 +6,7 @@ from tqdm import tqdm
|
|||
import quapy as qp
|
||||
from MultiLabel.mlclassification import MultilabelStackedClassifier
|
||||
from MultiLabel.mldata import MultilabelledCollection
|
||||
from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC, MultilabelRegressionQuantification, \
|
||||
from MultiLabel.mlquantification import MultilabelNaiveQuantifier, MLCC, MLPCC, MLRegressionQuantification, \
|
||||
MLACC, \
|
||||
MLPACC, MultilabelNaiveAggregativeQuantifier
|
||||
from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
|
||||
|
@ -44,11 +44,23 @@ def models():
|
|||
# yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None, order='random'))
|
||||
# yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None, order='random'))
|
||||
# yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None, order='random'))
|
||||
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False}
|
||||
yield 'MRQ-CC', MultilabelRegressionQuantification(base_quantifier=CC(cls()), regression='svr', **common)
|
||||
yield 'MRQ-PCC', MultilabelRegressionQuantification(base_quantifier=PCC(cls()), regression='svr', **common)
|
||||
yield 'MRQ-ACC', MultilabelRegressionQuantification(base_quantifier=ACC(cls()), regression='svr', **common)
|
||||
yield 'MRQ-PACC', MultilabelRegressionQuantification(base_quantifier=PACC(cls()), regression='svr', **common)
|
||||
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False, 'regression':'svr'}
|
||||
# yield 'MRQ-CC', MLRegressionQuantification(MultilabelNaiveQuantifier(CC(cls())), **common)
|
||||
# yield 'MRQ-PCC', MLRegressionQuantification(MultilabelNaiveQuantifier(PCC(cls())), **common)
|
||||
# yield 'MRQ-ACC', MLRegressionQuantification(MultilabelNaiveQuantifier(ACC(cls())), **common)
|
||||
# yield 'MRQ-PACC', MLRegressionQuantification(MultilabelNaiveQuantifier(PACC(cls())), **common)
|
||||
# yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), **common)
|
||||
# yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), **common)
|
||||
# yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), **common)
|
||||
# yield 'MRQ-StackPACC', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), **common)
|
||||
yield 'MRQ-StackCC-app', MLRegressionQuantification(MLCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
|
||||
yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MultilabelStackedClassifier(cls())), protocol='app', **common)
|
||||
yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
|
||||
yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MultilabelStackedClassifier(cls())), protocol='app', **common)
|
||||
# yield 'MRQ-ChainCC', MLRegressionQuantification(MLCC(ClassifierChain(cls())), **common)
|
||||
# yield 'MRQ-ChainPCC', MLRegressionQuantification(MLPCC(ClassifierChain(cls())), **common)
|
||||
# yield 'MRQ-ChainACC', MLRegressionQuantification(MLACC(ClassifierChain(cls())), **common)
|
||||
# yield 'MRQ-ChainPACC', MLRegressionQuantification(MLPACC(ClassifierChain(cls())), **common)
|
||||
|
||||
|
||||
dataset = 'reuters21578'
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
num categories = 10
|
||||
Train-counts: [1650 181 389 2877 433 347 538 197 369 212]
|
||||
Test-counts: [ 719 56 189 1087 149 131 179 89 117 71]
|
||||
MLPE: 0.01101
|
||||
|
||||
NPP:
|
||||
NaiveCC mae=0.01718
|
||||
NaivePCC mae=0.00898
|
||||
NaiveACC mae=0.01560
|
||||
NaivePACC mae=0.01062
|
||||
|
||||
StackCC mae=0.00790
|
||||
StackPCC mae=0.00659 **
|
||||
StackACC mae=0.00913
|
||||
StackPACC mae=0.00771
|
||||
|
||||
ChainCC mae=0.01644
|
||||
ChainPCC mae=0.00924
|
||||
ChainACC mae=0.01767
|
||||
ChainPACC mae=0.01140
|
||||
|
||||
MRQ-CC mae=0.01130
|
||||
MRQ-PCC mae=0.00941
|
||||
MRQ-ACC mae=0.01153
|
||||
MRQ-PACC mae=0.01000
|
||||
|
||||
MRQ-StackCC mae=0.00757
|
||||
MRQ-StackPCC mae=0.00652 **
|
||||
MRQ-StackACC mae=0.00799
|
||||
MRQ-StackPACC mae=0.00763
|
||||
|
||||
MRQ-StackCC-app mae=0.00791
|
||||
MRQ-StackPCC-appmae=0.00840
|
||||
MRQ-StackACC-appmae=0.00910
|
||||
MRQ-StackPACC-apmae=0.00941
|
||||
|
||||
MRQ-ChainCC mae=0.00989
|
||||
MRQ-ChainPCC mae=0.00916
|
||||
MRQ-ChainACC mae=0.01251
|
||||
MRQ-ChainPACC mae=0.00954
|
||||
|
||||
APP:
|
||||
NaiveCC mae=0.04120
|
||||
NaivePCC mae=0.03741
|
||||
NaiveACC mae=0.03202
|
||||
NaivePACC mae=0.02293
|
||||
|
||||
StackCC mae=0.01969
|
||||
StackPCC mae=0.01871
|
||||
StackACC mae=0.01386 **
|
||||
StackPACC mae=0.01267 **
|
||||
|
||||
ChainCC mae=0.04136
|
||||
ChainPCC mae=0.03571
|
||||
ChainACC mae=0.03622
|
||||
ChainPACC mae=0.02659
|
||||
|
||||
MRQ-CC mae=0.04356
|
||||
MRQ-PCC mae=0.02532
|
||||
MRQ-ACC mae=0.05716
|
||||
MRQ-PACC mae=0.02936
|
||||
|
||||
MRQ-StackCC mae=0.02448
|
||||
MRQ-StackPCC mae=0.02090
|
||||
MRQ-StackACC mae=0.02579
|
||||
MRQ-StackPACC mae=0.02388
|
||||
|
||||
MRQ-StackCC-app mae=0.01535
|
||||
MRQ-StackPCC-appmae=0.01457
|
||||
MRQ-StackACC-appmae=0.01441
|
||||
MRQ-StackPACC-apmae=0.01633
|
||||
|
||||
MRQ-ChainCC mae=0.04874
|
||||
MRQ-ChainPCC mae=0.02537
|
||||
MRQ-ChainACC mae=0.06262
|
||||
MRQ-ChainPACC mae=0.02906
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue