forked from moreo/QuaPy
committing last changes before creating a branch
This commit is contained in:
parent
4572ec266d
commit
6f3f103b3b
|
@ -23,12 +23,19 @@ import os
|
||||||
import pickle
|
import pickle
|
||||||
|
|
||||||
models = [#'MLPE',
|
models = [#'MLPE',
|
||||||
'NaiveCC', 'NaivePCC', 'NaiveACC', 'NaivePACC', #'NaiveHDy', 'NaiveSLD',
|
'NaiveCC', 'NaivePCC', 'NaivePCCcal', 'NaiveACC', 'NaivePACC', 'NaivePACCcal', 'NaiveACCit', 'NaivePACCit',
|
||||||
'StackCC', 'StackPCC', 'StackACC', 'StackPACC',
|
#'NaiveHDy', 'NaiveSLD',
|
||||||
'MRQ-CC', 'MRQ-PCC', 'MRQ-ACC', 'MRQ-PACC',
|
'ChainCC', 'ChainPCC', 'ChainACC', 'ChainPACC',
|
||||||
'MRQ-StackCC', 'MRQ-StackPCC', 'MRQ-StackACC', 'MRQ-StackPACC',
|
'StackCC', 'StackPCC', 'StackPCCcal', 'StackACC', 'StackPACC', 'StackPACCcal', 'StackACCit', 'StackP'
|
||||||
'MRQ-StackCC-app', 'MRQ-StackPCC-app', 'MRQ-StackACC-app', 'MRQ-StackPACC-app',
|
'ACCit',
|
||||||
'LSP-CC', 'LSP-ACC'
|
'MRQ-CC', 'MRQ-PCC', 'MRQ-ACC', 'MRQ-PACC', 'MRQ-ACCit', 'MRQ-PACCit',
|
||||||
|
'StackMRQ-CC', 'StackMRQ-PCC', 'StackMRQ-ACC', 'StackMRQ-PACC',
|
||||||
|
'MRQ-StackCC', 'MRQ-StackPCC', 'MRQ-StackACC', 'MRQ-StackPACC',
|
||||||
|
'StackMRQ-StackCC', 'StackMRQ-StackPCC', 'StackMRQ-StackACC', 'StackMRQ-StackPACC',
|
||||||
|
'MRQ-StackCC-app', 'MRQ-StackPCC-app', 'MRQ-StackACC-app', 'MRQ-StackPACC-app',
|
||||||
|
'StackMRQ-StackCC-app', 'StackMRQ-StackPCC-app', 'StackMRQ-StackACC-app', 'StackMRQ-StackPACC-app',
|
||||||
|
'LSP-CC', 'LSP-ACC', 'MLKNN-CC', 'MLKNN-ACC',
|
||||||
|
'MLAdjustedC', 'MLStackAdjustedC', 'MLprobAdjustedC', 'MLStackProbAdjustedC'
|
||||||
]
|
]
|
||||||
|
|
||||||
# datasets = sorted(set([x[0] for x in available_data_sets().keys()]))
|
# datasets = sorted(set([x[0] for x in available_data_sets().keys()]))
|
||||||
|
@ -64,6 +71,12 @@ def generate_table(path, protocol, error):
|
||||||
dataset, model, scores = r
|
dataset, model, scores = r
|
||||||
table.add(dataset, model, scores)
|
table.add(dataset, model, scores)
|
||||||
|
|
||||||
|
save_table(table, path)
|
||||||
|
save_table(table.getRankTable(), path.replace('.tex','.rank.tex'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def save_table(table, path):
|
||||||
tabular = """
|
tabular = """
|
||||||
\\resizebox{\\textwidth}{!}{%
|
\\resizebox{\\textwidth}{!}{%
|
||||||
\\begin{tabular}{|c||""" + ('c|' * len(models)) + """} \hline
|
\\begin{tabular}{|c||""" + ('c|' * len(models)) + """} \hline
|
||||||
|
@ -79,7 +92,6 @@ def generate_table(path, protocol, error):
|
||||||
with open(path, 'wt') as foo:
|
with open(path, 'wt') as foo:
|
||||||
foo.write(tabular)
|
foo.write(tabular)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
parser = argparse.ArgumentParser(description='Experiments for multi-label quantification')
|
parser = argparse.ArgumentParser(description='Experiments for multi-label quantification')
|
||||||
parser.add_argument('--results', type=str, default='./results', metavar='str',
|
parser.add_argument('--results', type=str, default='./results', metavar='str',
|
||||||
|
|
|
@ -2,6 +2,8 @@ import argparse
|
||||||
from sklearn.calibration import CalibratedClassifierCV
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
|
from sklearn.multiclass import OneVsRestClassifier
|
||||||
from sklearn.multioutput import ClassifierChain
|
from sklearn.multioutput import ClassifierChain
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
from skmultilearn.dataset import load_dataset, available_data_sets
|
from skmultilearn.dataset import load_dataset, available_data_sets
|
||||||
|
@ -11,7 +13,7 @@ from MultiLabel.mlclassification import MLStackedClassifier, LabelSpacePartion,
|
||||||
from MultiLabel.mldata import MultilabelledCollection
|
from MultiLabel.mldata import MultilabelledCollection
|
||||||
from MultiLabel.mlquantification import MLNaiveQuantifier, MLCC, MLPCC, MLRegressionQuantification, \
|
from MultiLabel.mlquantification import MLNaiveQuantifier, MLCC, MLPCC, MLRegressionQuantification, \
|
||||||
MLACC, \
|
MLACC, \
|
||||||
MLPACC, MLNaiveAggregativeQuantifier, MLMLPE
|
MLPACC, MLNaiveAggregativeQuantifier, MLMLPE, StackMLRQuantifier, MLadjustedCount, MLprobAdjustedCount
|
||||||
from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
|
from method.aggregative import PACC, CC, EMQ, PCC, ACC, HDy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from data.dataset import Dataset
|
from data.dataset import Dataset
|
||||||
|
@ -49,23 +51,33 @@ def models():
|
||||||
yield 'MLPE', MLMLPE()
|
yield 'MLPE', MLMLPE()
|
||||||
yield 'NaiveCC', MLNaiveAggregativeQuantifier(CC(cls()))
|
yield 'NaiveCC', MLNaiveAggregativeQuantifier(CC(cls()))
|
||||||
yield 'NaivePCC', MLNaiveAggregativeQuantifier(PCC(cls()))
|
yield 'NaivePCC', MLNaiveAggregativeQuantifier(PCC(cls()))
|
||||||
|
yield 'NaivePCCcal', MLNaiveAggregativeQuantifier(PCC(calibratedCls()))
|
||||||
yield 'NaiveACC', MLNaiveAggregativeQuantifier(ACC(cls()))
|
yield 'NaiveACC', MLNaiveAggregativeQuantifier(ACC(cls()))
|
||||||
yield 'NaivePACC', MLNaiveAggregativeQuantifier(PACC(cls()))
|
yield 'NaivePACC', MLNaiveAggregativeQuantifier(PACC(cls()))
|
||||||
|
yield 'NaivePACCcal', MLNaiveAggregativeQuantifier(PACC(calibratedCls()))
|
||||||
|
yield 'NaiveACCit', MLNaiveAggregativeQuantifier(ACC(cls()))
|
||||||
|
yield 'NaivePACCit', MLNaiveAggregativeQuantifier(PACC(cls()))
|
||||||
# yield 'NaiveHDy', MLNaiveAggregativeQuantifier(HDy(cls()))
|
# yield 'NaiveHDy', MLNaiveAggregativeQuantifier(HDy(cls()))
|
||||||
# yield 'NaiveSLD', MLNaiveAggregativeQuantifier(EMQ(calibratedCls()))
|
# yield 'NaiveSLD', MLNaiveAggregativeQuantifier(EMQ(calibratedCls()))
|
||||||
yield 'StackCC', MLCC(MLStackedClassifier(cls()))
|
yield 'StackCC', MLCC(MLStackedClassifier(cls()))
|
||||||
yield 'StackPCC', MLPCC(MLStackedClassifier(cls()))
|
yield 'StackPCC', MLPCC(MLStackedClassifier(cls()))
|
||||||
|
yield 'StackPCCcal', MLPCC(MLStackedClassifier(calibratedCls()))
|
||||||
yield 'StackACC', MLACC(MLStackedClassifier(cls()))
|
yield 'StackACC', MLACC(MLStackedClassifier(cls()))
|
||||||
yield 'StackPACC', MLPACC(MLStackedClassifier(cls()))
|
yield 'StackPACC', MLPACC(MLStackedClassifier(cls()))
|
||||||
# yield 'ChainCC', MLCC(ClassifierChain(cls(), cv=None, order='random'))
|
yield 'StackPACCcal', MLPACC(MLStackedClassifier(calibratedCls()))
|
||||||
# yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None, order='random'))
|
yield 'StackACCit', MLACC(MLStackedClassifier(cls()))
|
||||||
# yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None, order='random'))
|
yield 'StackPACCit', MLPACC(MLStackedClassifier(cls()))
|
||||||
# yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None, order='random'))
|
# yield 'ChainCC', MLCC(ClassifierChain(cls(), cv=None))
|
||||||
|
# yield 'ChainPCC', MLPCC(ClassifierChain(cls(), cv=None))
|
||||||
|
# yield 'ChainACC', MLACC(ClassifierChain(cls(), cv=None))
|
||||||
|
# yield 'ChainPACC', MLPACC(ClassifierChain(cls(), cv=None))
|
||||||
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False, 'regression':'svr'}
|
common={'sample_size':sample_size, 'n_samples': n_samples, 'norm': True, 'means':False, 'stds':False, 'regression':'svr'}
|
||||||
yield 'MRQ-CC', MLRegressionQuantification(MLNaiveQuantifier(CC(cls())), **common)
|
yield 'MRQ-CC', MLRegressionQuantification(MLNaiveQuantifier(CC(cls())), **common)
|
||||||
yield 'MRQ-PCC', MLRegressionQuantification(MLNaiveQuantifier(PCC(cls())), **common)
|
yield 'MRQ-PCC', MLRegressionQuantification(MLNaiveQuantifier(PCC(cls())), **common)
|
||||||
yield 'MRQ-ACC', MLRegressionQuantification(MLNaiveQuantifier(ACC(cls())), **common)
|
yield 'MRQ-ACC', MLRegressionQuantification(MLNaiveQuantifier(ACC(cls())), **common)
|
||||||
yield 'MRQ-PACC', MLRegressionQuantification(MLNaiveQuantifier(PACC(cls())), **common)
|
yield 'MRQ-PACC', MLRegressionQuantification(MLNaiveQuantifier(PACC(cls())), **common)
|
||||||
|
yield 'MRQ-ACCit', MLRegressionQuantification(MLNaiveQuantifier(ACC(cls())), **common)
|
||||||
|
yield 'MRQ-PACCit', MLRegressionQuantification(MLNaiveQuantifier(PACC(cls())), **common)
|
||||||
yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MLStackedClassifier(cls())), **common)
|
yield 'MRQ-StackCC', MLRegressionQuantification(MLCC(MLStackedClassifier(cls())), **common)
|
||||||
yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MLStackedClassifier(cls())), **common)
|
yield 'MRQ-StackPCC', MLRegressionQuantification(MLPCC(MLStackedClassifier(cls())), **common)
|
||||||
yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MLStackedClassifier(cls())), **common)
|
yield 'MRQ-StackACC', MLRegressionQuantification(MLACC(MLStackedClassifier(cls())), **common)
|
||||||
|
@ -74,6 +86,23 @@ def models():
|
||||||
yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MLStackedClassifier(cls())), protocol='app', **common)
|
yield 'MRQ-StackPCC-app', MLRegressionQuantification(MLPCC(MLStackedClassifier(cls())), protocol='app', **common)
|
||||||
yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MLStackedClassifier(cls())), protocol='app', **common)
|
yield 'MRQ-StackACC-app', MLRegressionQuantification(MLACC(MLStackedClassifier(cls())), protocol='app', **common)
|
||||||
yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MLStackedClassifier(cls())), protocol='app', **common)
|
yield 'MRQ-StackPACC-app', MLRegressionQuantification(MLPACC(MLStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
yield 'StackMRQ-CC', StackMLRQuantifier(MLNaiveQuantifier(CC(cls())), **common)
|
||||||
|
yield 'StackMRQ-PCC', StackMLRQuantifier(MLNaiveQuantifier(PCC(cls())), **common)
|
||||||
|
yield 'StackMRQ-ACC', StackMLRQuantifier(MLNaiveQuantifier(ACC(cls())), **common)
|
||||||
|
yield 'StackMRQ-PACC', StackMLRQuantifier(MLNaiveQuantifier(PACC(cls())), **common)
|
||||||
|
yield 'StackMRQ-StackCC', StackMLRQuantifier(MLCC(MLStackedClassifier(cls())), **common)
|
||||||
|
yield 'StackMRQ-StackPCC', StackMLRQuantifier(MLPCC(MLStackedClassifier(cls())), **common)
|
||||||
|
yield 'StackMRQ-StackACC', StackMLRQuantifier(MLACC(MLStackedClassifier(cls())), **common)
|
||||||
|
yield 'StackMRQ-StackPACC', StackMLRQuantifier(MLPACC(MLStackedClassifier(cls())), **common)
|
||||||
|
yield 'StackMRQ-StackCC-app', StackMLRQuantifier(MLCC(MLStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
yield 'StackMRQ-StackPCC-app', StackMLRQuantifier(MLPCC(MLStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
yield 'StackMRQ-StackACC-app', StackMLRQuantifier(MLACC(MLStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
yield 'StackMRQ-StackPACC-app', StackMLRQuantifier(MLPACC(MLStackedClassifier(cls())), protocol='app', **common)
|
||||||
|
yield 'MLAdjustedC', MLadjustedCount(OneVsRestClassifier(cls()))
|
||||||
|
yield 'MLStackAdjustedC', MLadjustedCount(MLStackedClassifier(cls()))
|
||||||
|
# yield 'MLprobAdjustedC', MLprobAdjustedCount(OneVsRestClassifier(calibratedCls()))
|
||||||
|
# yield 'MLStackProbAdjustedC', MLprobAdjustedCount(MLStackedClassifier(calibratedCls()))
|
||||||
|
|
||||||
# yield 'MRQ-ChainCC', MLRegressionQuantification(MLCC(ClassifierChain(cls())), **common)
|
# yield 'MRQ-ChainCC', MLRegressionQuantification(MLCC(ClassifierChain(cls())), **common)
|
||||||
# yield 'MRQ-ChainPCC', MLRegressionQuantification(MLPCC(ClassifierChain(cls())), **common)
|
# yield 'MRQ-ChainPCC', MLRegressionQuantification(MLPCC(ClassifierChain(cls())), **common)
|
||||||
# yield 'MRQ-ChainACC', MLRegressionQuantification(MLACC(ClassifierChain(cls())), **common)
|
# yield 'MRQ-ChainACC', MLRegressionQuantification(MLACC(ClassifierChain(cls())), **common)
|
||||||
|
@ -82,10 +111,10 @@ def models():
|
||||||
# yield 'LSP-ACC', MLACC(LabelSpacePartion(cls()))
|
# yield 'LSP-ACC', MLACC(LabelSpacePartion(cls()))
|
||||||
# yield 'TwinSVM-CC', MLCC(MLTwinSVM())
|
# yield 'TwinSVM-CC', MLCC(MLTwinSVM())
|
||||||
# yield 'TwinSVM-ACC', MLACC(MLTwinSVM())
|
# yield 'TwinSVM-ACC', MLACC(MLTwinSVM())
|
||||||
yield 'MLKNN-CC', MLCC(MLknn())
|
# yield 'MLKNN-CC', MLCC(MLknn())
|
||||||
yield 'MLKNN-PCC', MLPCC(MLknn())
|
#yield 'MLKNN-PCC', MLPCC(MLknn())
|
||||||
yield 'MLKNN-ACC', MLACC(MLknn())
|
# yield 'MLKNN-ACC', MLACC(MLknn())
|
||||||
yield 'MLKNN-PACC', MLPACC(MLknn())
|
#yield 'MLKNN-PACC', MLPACC(MLknn())
|
||||||
|
|
||||||
|
|
||||||
def get_dataset(dataset_name, dopickle=True):
|
def get_dataset(dataset_name, dopickle=True):
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
from sklearn.calibration import CalibratedClassifierCV
|
from sklearn.calibration import CalibratedClassifierCV
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression, Ridge
|
||||||
from sklearn.multiclass import OneVsRestClassifier
|
from sklearn.multiclass import OneVsRestClassifier
|
||||||
from sklearn.preprocessing import StandardScaler
|
from sklearn.preprocessing import StandardScaler
|
||||||
from skmultilearn.adapt import MLTSVM
|
from skmultilearn.adapt import MLTSVM
|
||||||
|
@ -44,6 +44,25 @@ class MLStackedClassifier: # aka Funnelling Monolingual
|
||||||
return self.meta.predict_proba(P)
|
return self.meta.predict_proba(P)
|
||||||
|
|
||||||
|
|
||||||
|
class MLStackedRegressor:
|
||||||
|
def __init__(self, base_regressor=Ridge(normalize=True)):
|
||||||
|
self.base = deepcopy(base_regressor)
|
||||||
|
self.meta = deepcopy(base_regressor)
|
||||||
|
|
||||||
|
def fit(self, X, y):
|
||||||
|
assert y.ndim==2, 'the dataset does not seem to be multi-label'
|
||||||
|
self.base.fit(X, y)
|
||||||
|
R = self.base.predict(X)
|
||||||
|
# R = self.norm.fit_transform(R)
|
||||||
|
self.meta.fit(R, y)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def predict(self, X):
|
||||||
|
R = self.base.predict(X)
|
||||||
|
# R = self.norm.transform(R)
|
||||||
|
return self.meta.predict(R)
|
||||||
|
|
||||||
|
|
||||||
class LabelSpacePartion:
|
class LabelSpacePartion:
|
||||||
def __init__(self, base_estimator=LogisticRegression()):
|
def __init__(self, base_estimator=LogisticRegression()):
|
||||||
graph_builder = LabelCooccurrenceGraphBuilder(weighted=True, include_self_edges=False)
|
graph_builder = LabelCooccurrenceGraphBuilder(weighted=True, include_self_edges=False)
|
||||||
|
|
|
@ -6,6 +6,7 @@ from sklearn.model_selection import train_test_split
|
||||||
from quapy.data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
from quapy.functional import artificial_prevalence_sampling
|
from quapy.functional import artificial_prevalence_sampling
|
||||||
|
|
||||||
|
from skmultilearn.model_selection import iterative_train_test_split
|
||||||
|
|
||||||
class MultilabelledCollection:
|
class MultilabelledCollection:
|
||||||
def __init__(self, instances, labels):
|
def __init__(self, instances, labels):
|
||||||
|
@ -67,10 +68,13 @@ class MultilabelledCollection:
|
||||||
labels = self.labels[index]
|
labels = self.labels[index]
|
||||||
return MultilabelledCollection(documents, labels)
|
return MultilabelledCollection(documents, labels)
|
||||||
|
|
||||||
def train_test_split(self, train_prop=0.6, random_state=None):
|
def train_test_split(self, train_prop=0.6, random_state=None, iterative=False):
|
||||||
#raise ValueError('use the scikit-multilearn implementation')
|
if iterative:
|
||||||
tr_docs, te_docs, tr_labels, te_labels = \
|
tr_docs, tr_labels, te_docs, te_labels = \
|
||||||
train_test_split(self.instances, self.labels, train_size=train_prop, random_state=random_state)
|
iterative_train_test_split(self.instances, self.labels, test_size=1-train_prop)
|
||||||
|
else:
|
||||||
|
tr_docs, te_docs, tr_labels, te_labels = \
|
||||||
|
train_test_split(self.instances, self.labels, train_size=train_prop, random_state=random_state)
|
||||||
return MultilabelledCollection(tr_docs, tr_labels), MultilabelledCollection(te_docs, te_labels)
|
return MultilabelledCollection(tr_docs, tr_labels), MultilabelledCollection(te_docs, te_labels)
|
||||||
|
|
||||||
def artificial_sampling_generator(self, sample_size, category, n_prevalences=101, repeats=1):
|
def artificial_sampling_generator(self, sample_size, category, n_prevalences=101, repeats=1):
|
||||||
|
@ -98,6 +102,10 @@ class MultilabelledCollection:
|
||||||
for c in self.classes_:
|
for c in self.classes_:
|
||||||
yield self.asLabelledCollection(c)
|
yield self.asLabelledCollection(c)
|
||||||
|
|
||||||
|
# @property
|
||||||
|
# def label_cardinality(self):
|
||||||
|
# return self.labels.sum()/len(self)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def Xy(self):
|
def Xy(self):
|
||||||
return self.instances, self.labels
|
return self.instances, self.labels
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
|
import sklearn.preprocessing
|
||||||
|
from sklearn.ensemble import StackingRegressor
|
||||||
from sklearn.metrics import confusion_matrix
|
from sklearn.metrics import confusion_matrix
|
||||||
from sklearn.multioutput import MultiOutputRegressor
|
from sklearn.multioutput import MultiOutputRegressor
|
||||||
from sklearn.preprocessing import StandardScaler
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
@ -9,7 +11,7 @@ from sklearn.linear_model import LogisticRegression, Ridge, Lasso, LassoCV, Mult
|
||||||
ElasticNet, MultiTaskElasticNetCV, MultiTaskElasticNet, LinearRegression, ARDRegression, BayesianRidge, SGDRegressor
|
ElasticNet, MultiTaskElasticNetCV, MultiTaskElasticNet, LinearRegression, ARDRegression, BayesianRidge, SGDRegressor
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
from MultiLabel.mlclassification import MLStackedClassifier
|
from MultiLabel.mlclassification import MLStackedClassifier, MLStackedRegressor
|
||||||
from MultiLabel.mldata import MultilabelledCollection
|
from MultiLabel.mldata import MultilabelledCollection
|
||||||
from method.aggregative import CC, ACC, PACC, AggregativeQuantifier
|
from method.aggregative import CC, ACC, PACC, AggregativeQuantifier
|
||||||
from method.base import BaseQuantifier
|
from method.base import BaseQuantifier
|
||||||
|
@ -166,13 +168,17 @@ class MLRegressionQuantification:
|
||||||
norm=True,
|
norm=True,
|
||||||
means=True,
|
means=True,
|
||||||
stds=True):
|
stds=True):
|
||||||
assert regression in ['ridge', 'svr'], 'unknown regression model'
|
|
||||||
assert protocol in ['npp', 'app'], 'unknown protocol'
|
assert protocol in ['npp', 'app'], 'unknown protocol'
|
||||||
self.estimator = mlquantifier
|
self.estimator = mlquantifier
|
||||||
if regression == 'ridge':
|
if isinstance(regression, str):
|
||||||
self.reg = Ridge(normalize=norm)
|
assert regression in ['ridge', 'svr'], 'unknown regression model'
|
||||||
elif regression == 'svr':
|
if regression == 'ridge':
|
||||||
self.reg = MultiOutputRegressor(LinearSVR())
|
self.reg = Ridge(normalize=norm)
|
||||||
|
elif regression == 'svr':
|
||||||
|
self.reg = MultiOutputRegressor(LinearSVR())
|
||||||
|
else:
|
||||||
|
self.reg = regression
|
||||||
self.protocol = protocol
|
self.protocol = protocol
|
||||||
# self.reg = MultiTaskLassoCV(normalize=norm)
|
# self.reg = MultiTaskLassoCV(normalize=norm)
|
||||||
# self.reg = KernelRidge(kernel='rbf')
|
# self.reg = KernelRidge(kernel='rbf')
|
||||||
|
@ -215,7 +221,7 @@ class MLRegressionQuantification:
|
||||||
Xs, ys = [], []
|
Xs, ys = [], []
|
||||||
samples_mean, samples_std = [], []
|
samples_mean, samples_std = [], []
|
||||||
for sample in val.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
|
for sample in val.natural_sampling_generator(sample_size=self.sample_size, repeats=self.n_samples):
|
||||||
self._extract_features(self, sample, Xs, ys, samples_mean, samples_std)
|
self._extract_features(sample, Xs, ys, samples_mean, samples_std)
|
||||||
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
|
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
|
||||||
|
|
||||||
|
|
||||||
|
@ -227,7 +233,7 @@ class MLRegressionQuantification:
|
||||||
repeats = max(self.n_samples // (ncats * nprevs), 1)
|
repeats = max(self.n_samples // (ncats * nprevs), 1)
|
||||||
for cat in self.classes_:
|
for cat in self.classes_:
|
||||||
for sample in val.artificial_sampling_generator(sample_size=self.sample_size, category=cat, n_prevalences=nprevs, repeats=repeats):
|
for sample in val.artificial_sampling_generator(sample_size=self.sample_size, category=cat, n_prevalences=nprevs, repeats=repeats):
|
||||||
self._extract_features(self, sample, Xs, ys, samples_mean, samples_std)
|
self._extract_features(sample, Xs, ys, samples_mean, samples_std)
|
||||||
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
|
return self._prepare_arrays(Xs, ys, samples_mean, samples_std)
|
||||||
|
|
||||||
def fit(self, data:MultilabelledCollection):
|
def fit(self, data:MultilabelledCollection):
|
||||||
|
@ -259,4 +265,97 @@ class MLRegressionQuantification:
|
||||||
return np.asarray([neg_prevs, adjusted]).T
|
return np.asarray([neg_prevs, adjusted]).T
|
||||||
|
|
||||||
|
|
||||||
# class
|
class StackMLRQuantifier:
|
||||||
|
def __init__(self,
|
||||||
|
mlquantifier=MLNaiveQuantifier(CC(LinearSVC())),
|
||||||
|
regression='ridge',
|
||||||
|
protocol='npp',
|
||||||
|
n_samples=500,
|
||||||
|
sample_size=500,
|
||||||
|
norm=True,
|
||||||
|
means=True,
|
||||||
|
stds=True):
|
||||||
|
if regression == 'ridge':
|
||||||
|
reg = MLStackedRegressor(Ridge(normalize=True))
|
||||||
|
elif regression == 'svr':
|
||||||
|
reg = MLStackedRegressor(MultiOutputRegressor(LinearSVR()))
|
||||||
|
else:
|
||||||
|
ValueError(f'unknown regressor {regression}')
|
||||||
|
|
||||||
|
self.base = MLRegressionQuantification(
|
||||||
|
mlquantifier=mlquantifier,
|
||||||
|
regression=reg,
|
||||||
|
protocol=protocol,
|
||||||
|
n_samples=n_samples,
|
||||||
|
sample_size=sample_size,
|
||||||
|
norm=norm,
|
||||||
|
means=means,
|
||||||
|
stds=stds)
|
||||||
|
|
||||||
|
def fit(self, data:MultilabelledCollection):
|
||||||
|
self.classes_ = data.classes_
|
||||||
|
self.base.fit(data)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def quantify(self, instances):
|
||||||
|
return self.base.quantify(instances)
|
||||||
|
|
||||||
|
|
||||||
|
class MLadjustedCount(MLAggregativeQuantifier):
|
||||||
|
def __init__(self, learner):
|
||||||
|
self.learner = learner
|
||||||
|
|
||||||
|
def preclassify(self, instances):
|
||||||
|
return self.learner.predict(instances)
|
||||||
|
|
||||||
|
def fit(self, data: MultilabelledCollection, train_prop=0.6):
|
||||||
|
self.classes_ = data.classes_
|
||||||
|
train, val = data.train_test_split(train_prop=train_prop)
|
||||||
|
self.learner.fit(*train.Xy)
|
||||||
|
val_predictions = self.preclassify(val.instances)
|
||||||
|
val_true = val.labels
|
||||||
|
|
||||||
|
N = len(val)
|
||||||
|
C = val_predictions.T.dot(val_true) / N # join probabilities [[P(y1,\hat{y}1), P(y2,\hat{y}1)], ... ]
|
||||||
|
priorP = val_predictions.mean(axis=0).reshape(-1,1) # priors [P(hat{y}1), P(hat{y}2), ...]
|
||||||
|
self.Pte_cond_estim_ = np.true_divide(C, priorP, where=priorP>0) # cond probabilities [[P(y1|\hat{y}1), P(y2|\hat{y}1)], ... ]
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def aggregate(self, predictions):
|
||||||
|
P = sklearn.preprocessing.normalize(predictions, norm='l1')
|
||||||
|
correction = P.dot(self.Pte_cond_estim_)
|
||||||
|
adjusted = correction.mean(axis=0)
|
||||||
|
return np.asarray([1-adjusted, adjusted]).T
|
||||||
|
|
||||||
|
|
||||||
|
class MLprobAdjustedCount(MLAggregativeQuantifier):
|
||||||
|
def __init__(self, learner):
|
||||||
|
self.learner = learner
|
||||||
|
|
||||||
|
def preclassify(self, instances):
|
||||||
|
return self.learner.predict_proba(instances)
|
||||||
|
|
||||||
|
def fit(self, data: MultilabelledCollection, train_prop=0.6):
|
||||||
|
self.classes_ = data.classes_
|
||||||
|
train, val = data.train_test_split(train_prop=train_prop)
|
||||||
|
self.learner.fit(*train.Xy)
|
||||||
|
val_predictions = self.preclassify(val.instances)
|
||||||
|
val_true = val.labels
|
||||||
|
|
||||||
|
N = len(val)
|
||||||
|
|
||||||
|
C = (val_predictions>0.5).T.dot(val_true) / N # join probabilities [[P(y1,\hat{y}1), P(y2,\hat{y}1)], ... ]
|
||||||
|
# not sure...
|
||||||
|
|
||||||
|
|
||||||
|
priorP = val_predictions.mean(axis=0).reshape(-1,1) # priors [P(hat{y}1), P(hat{y}2), ...]
|
||||||
|
self.Pte_cond_estim_ = np.true_divide(C, priorP, where=priorP>0) # cond probabilities [[P(y1|\hat{y}1), P(y2|\hat{y}1)], ... ]
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def aggregate(self, predictions):
|
||||||
|
P = sklearn.preprocessing.normalize(predictions, norm='l1')
|
||||||
|
correction = P.dot(self.Pte_cond_estim_)
|
||||||
|
adjusted = correction.mean(axis=0)
|
||||||
|
return np.asarray([1-adjusted, adjusted]).T
|
||||||
|
|
Loading…
Reference in New Issue