mergin and solving pytests
This commit is contained in:
parent
dbda25b09a
commit
3847db3838
|
|
@ -22,6 +22,7 @@ data = qp.data.preprocessing.text2tfidf(
|
||||||
min_df = 5,
|
min_df = 5,
|
||||||
)
|
)
|
||||||
training, testing = data.train_test
|
training, testing = data.train_test
|
||||||
|
Xtr, ytr = training.Xy
|
||||||
|
|
||||||
# We start by recovering PACC from its building blocks, a LeastSquaresLoss and
|
# We start by recovering PACC from its building blocks, a LeastSquaresLoss and
|
||||||
# a probabilistic ClassRepresentation. A 5-fold cross-validation is implemented
|
# a probabilistic ClassRepresentation. A 5-fold cross-validation is implemented
|
||||||
|
|
@ -46,7 +47,7 @@ pacc = ComposableQuantifier(
|
||||||
# Let's evaluate this quantifier.
|
# Let's evaluate this quantifier.
|
||||||
|
|
||||||
print(f"Evaluating PACC: {pacc}")
|
print(f"Evaluating PACC: {pacc}")
|
||||||
pacc.fit(training)
|
pacc.fit(Xtr, ytr)
|
||||||
app = qp.protocol.APP(testing, sample_size=100, n_prevalences=21, repeats=1)
|
app = qp.protocol.APP(testing, sample_size=100, n_prevalences=21, repeats=1)
|
||||||
absolute_errors = qp.evaluation.evaluate(
|
absolute_errors = qp.evaluation.evaluate(
|
||||||
model = pacc,
|
model = pacc,
|
||||||
|
|
@ -70,7 +71,7 @@ model = ComposableQuantifier(
|
||||||
)
|
)
|
||||||
|
|
||||||
print(f"Evaluating {model}")
|
print(f"Evaluating {model}")
|
||||||
model.fit(training)
|
model.fit(Xtr, ytr)
|
||||||
absolute_errors = qp.evaluation.evaluate(
|
absolute_errors = qp.evaluation.evaluate(
|
||||||
model = model,
|
model = model,
|
||||||
protocol = app, # use the same protocol for evaluation
|
protocol = app, # use the same protocol for evaluation
|
||||||
|
|
@ -125,7 +126,7 @@ grid_search = qp.model_selection.GridSearchQ(
|
||||||
error = "mae",
|
error = "mae",
|
||||||
refit = False,
|
refit = False,
|
||||||
verbose = True,
|
verbose = True,
|
||||||
).fit(training)
|
).fit(Xtr, ytr)
|
||||||
print(
|
print(
|
||||||
f"Best hyper-parameters = {grid_search.best_params_}",
|
f"Best hyper-parameters = {grid_search.best_params_}",
|
||||||
f"Best MAE = {grid_search.best_score_}",
|
f"Best MAE = {grid_search.best_score_}",
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,8 @@ AGGREGATIVE_METHODS = {
|
||||||
aggregative.KDEyML,
|
aggregative.KDEyML,
|
||||||
aggregative.KDEyCS,
|
aggregative.KDEyCS,
|
||||||
aggregative.KDEyHD,
|
aggregative.KDEyHD,
|
||||||
confidence.BayesianCC
|
# aggregative.OneVsAllAggregative,
|
||||||
|
confidence.BayesianCC,
|
||||||
}
|
}
|
||||||
|
|
||||||
BINARY_METHODS = {
|
BINARY_METHODS = {
|
||||||
|
|
|
||||||
|
|
@ -1406,18 +1406,20 @@ class OneVsAllAggregative(OneVsAllGeneric, AggregativeQuantifier):
|
||||||
`Gao and Sebastiani, 2016 <https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf>`_.
|
`Gao and Sebastiani, 2016 <https://link.springer.com/content/pdf/10.1007/s13278-016-0327-z.pdf>`_.
|
||||||
|
|
||||||
:param binary_quantifier: a quantifier (binary) that will be employed to work on multiclass model in a
|
:param binary_quantifier: a quantifier (binary) that will be employed to work on multiclass model in a
|
||||||
one-vs-all manner
|
one-vs-all manner (default PACC(LogitsticRegression()))
|
||||||
:param n_jobs: number of parallel workers
|
:param n_jobs: number of parallel workers
|
||||||
:param parallel_backend: the parallel backend for joblib (default "loky"); this is helpful for some quantifiers
|
:param parallel_backend: the parallel backend for joblib (default "loky"); this is helpful for some quantifiers
|
||||||
(e.g., ELM-based ones) that cannot be run with multiprocessing, since the temp dir they create during fit will
|
(e.g., ELM-based ones) that cannot be run with multiprocessing, since the temp dir they create during fit will
|
||||||
is removed and no longer available at predict time.
|
is removed and no longer available at predict time.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, binary_quantifier, n_jobs=None, parallel_backend='multiprocessing'):
|
def __init__(self, binary_quantifier=None, n_jobs=None, parallel_backend='multiprocessing'):
|
||||||
|
if binary_quantifier is None:
|
||||||
|
binary_quantifier = PACC()
|
||||||
assert isinstance(binary_quantifier, BaseQuantifier), \
|
assert isinstance(binary_quantifier, BaseQuantifier), \
|
||||||
f'{self.binary_quantifier} does not seem to be a Quantifier'
|
f'{binary_quantifier} does not seem to be a Quantifier'
|
||||||
assert isinstance(binary_quantifier, AggregativeQuantifier), \
|
assert isinstance(binary_quantifier, AggregativeQuantifier), \
|
||||||
f'{self.binary_quantifier} does not seem to be of type Aggregative'
|
f'{binary_quantifier} does not seem to be of type Aggregative'
|
||||||
self.binary_quantifier = binary_quantifier
|
self.binary_quantifier = binary_quantifier
|
||||||
self.n_jobs = qp._get_njobs(n_jobs)
|
self.n_jobs = qp._get_njobs(n_jobs)
|
||||||
self.parallel_backend = parallel_backend
|
self.parallel_backend = parallel_backend
|
||||||
|
|
|
||||||
|
|
@ -1,27 +1,28 @@
|
||||||
"""This module allows the composition of quantification methods from loss functions and feature transformations. This functionality is realized through an integration of the qunfold package: https://github.com/mirkobunse/qunfold."""
|
"""This module allows the composition of quantification methods from loss functions and feature transformations. This functionality is realized through an integration of the qunfold package: https://github.com/mirkobunse/qunfold."""
|
||||||
|
|
||||||
__install_istructions = """
|
from dataclasses import dataclass
|
||||||
|
from packaging.version import Version
|
||||||
|
|
||||||
|
from .base import BaseQuantifier
|
||||||
|
|
||||||
|
# what to display when an ImportError is thrown
|
||||||
|
_IMPORT_ERROR_MESSAGE = """qunfold, the back-end of quapy.method.composable, is not properly installed.
|
||||||
|
|
||||||
To fix this error, call:
|
To fix this error, call:
|
||||||
|
|
||||||
pip install --upgrade pip setuptools wheel
|
pip install --upgrade pip setuptools wheel
|
||||||
pip install "jax[cpu]"
|
pip install "jax[cpu]"
|
||||||
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.5"
|
pip install "qunfold @ git+https://github.com/mirkobunse/qunfold@v0.1.5"
|
||||||
"""
|
"""
|
||||||
__import_error_message = (
|
|
||||||
"qunfold, the back-end of quapy.method.composable, is not properly installed." + __install_istructions
|
|
||||||
)
|
|
||||||
__old_version_message = (
|
|
||||||
"The version of qunfold you have installed is not compatible with current quapy's version, "
|
|
||||||
"which requires qunfold>=0.1.5. " + __install_istructions
|
|
||||||
)
|
|
||||||
|
|
||||||
from packaging.version import Version
|
|
||||||
|
|
||||||
|
# try to import members of qunfold as members of this module
|
||||||
try:
|
try:
|
||||||
import qunfold
|
import qunfold
|
||||||
from qunfold.quapy import QuaPyWrapper
|
from qunfold.base import BaseMixin
|
||||||
|
from qunfold.methods import AbstractMethod
|
||||||
from qunfold.sklearn import CVClassifier
|
from qunfold.sklearn import CVClassifier
|
||||||
from qunfold import (
|
from qunfold import (
|
||||||
|
LinearMethod, # methods
|
||||||
LeastSquaresLoss, # losses
|
LeastSquaresLoss, # losses
|
||||||
BlobelLoss,
|
BlobelLoss,
|
||||||
EnergyLoss,
|
EnergyLoss,
|
||||||
|
|
@ -29,37 +30,38 @@ try:
|
||||||
CombinedLoss,
|
CombinedLoss,
|
||||||
TikhonovRegularization,
|
TikhonovRegularization,
|
||||||
TikhonovRegularized,
|
TikhonovRegularized,
|
||||||
ClassTransformer, # transformers
|
ClassRepresentation, # representations
|
||||||
HistogramTransformer,
|
HistogramRepresentation,
|
||||||
DistanceTransformer,
|
DistanceRepresentation,
|
||||||
KernelTransformer,
|
KernelRepresentation,
|
||||||
EnergyKernelTransformer,
|
EnergyKernelRepresentation,
|
||||||
LaplacianKernelTransformer,
|
LaplacianKernelRepresentation,
|
||||||
GaussianKernelTransformer,
|
GaussianKernelRepresentation,
|
||||||
GaussianRFFKernelTransformer,
|
GaussianRFFKernelRepresentation,
|
||||||
)
|
)
|
||||||
|
|
||||||
__all__ = [ # control public members, e.g., for auto-documentation in sphinx; omit QuaPyWrapper
|
|
||||||
"ComposableQuantifier",
|
|
||||||
"CVClassifier",
|
|
||||||
"LeastSquaresLoss",
|
|
||||||
"BlobelLoss",
|
|
||||||
"EnergyLoss",
|
|
||||||
"HellingerSurrogateLoss",
|
|
||||||
"CombinedLoss",
|
|
||||||
"TikhonovRegularization",
|
|
||||||
"TikhonovRegularized",
|
|
||||||
"ClassTransformer",
|
|
||||||
"HistogramTransformer",
|
|
||||||
"DistanceTransformer",
|
|
||||||
"KernelTransformer",
|
|
||||||
"EnergyKernelTransformer",
|
|
||||||
"LaplacianKernelTransformer",
|
|
||||||
"GaussianKernelTransformer",
|
|
||||||
"GaussianRFFKernelTransformer",
|
|
||||||
]
|
|
||||||
except ImportError as e:
|
except ImportError as e:
|
||||||
raise ImportError(__import_error_message) from e
|
raise ImportError(_IMPORT_ERROR_MESSAGE) from e
|
||||||
|
|
||||||
|
__all__ = [ # control public members, e.g., for auto-documentation in sphinx
|
||||||
|
"QUnfoldWrapper",
|
||||||
|
"ComposableQuantifier",
|
||||||
|
"CVClassifier",
|
||||||
|
"LeastSquaresLoss",
|
||||||
|
"BlobelLoss",
|
||||||
|
"EnergyLoss",
|
||||||
|
"HellingerSurrogateLoss",
|
||||||
|
"CombinedLoss",
|
||||||
|
"TikhonovRegularization",
|
||||||
|
"TikhonovRegularized",
|
||||||
|
"ClassRepresentation",
|
||||||
|
"HistogramRepresentation",
|
||||||
|
"DistanceRepresentation",
|
||||||
|
"KernelRepresentation",
|
||||||
|
"EnergyKernelRepresentation",
|
||||||
|
"LaplacianKernelRepresentation",
|
||||||
|
"GaussianKernelRepresentation",
|
||||||
|
"GaussianRFFKernelRepresentation",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def check_compatible_qunfold_version():
|
def check_compatible_qunfold_version():
|
||||||
|
|
@ -69,18 +71,54 @@ def check_compatible_qunfold_version():
|
||||||
# versions of qunfold <= 0.1.4 did not declare __version__ in the __init__.py but only in the setup.py
|
# versions of qunfold <= 0.1.4 did not declare __version__ in the __init__.py but only in the setup.py
|
||||||
version_str = "0.1.4"
|
version_str = "0.1.4"
|
||||||
|
|
||||||
compatible = Version(version_str) >= Version("0.1.5")
|
installed_ver = Version(version_str)
|
||||||
|
required_ver = Version("0.1.5")
|
||||||
|
compatible = installed_ver.base_version == required_ver.base_version or installed_ver>=required_ver
|
||||||
return compatible
|
return compatible
|
||||||
|
|
||||||
|
|
||||||
def ComposableQuantifier(loss, transformer, **kwargs):
|
@dataclass
|
||||||
|
class QUnfoldWrapper(BaseQuantifier,BaseMixin):
|
||||||
|
"""A thin wrapper for using qunfold methods in QuaPy.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
_method: An instance of `qunfold.methods.AbstractMethod` to wrap.
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
Here, we wrap an instance of ACC to perform a grid search with QuaPy.
|
||||||
|
|
||||||
|
>>> from qunfold import ACC
|
||||||
|
>>> qunfold_method = QUnfoldWrapper(ACC(RandomForestClassifier(obb_score=True)))
|
||||||
|
>>> quapy.model_selection.GridSearchQ(
|
||||||
|
>>> model = qunfold_method,
|
||||||
|
>>> param_grid = { # try both splitting criteria
|
||||||
|
>>> "representation__classifier__estimator__criterion": ["gini", "entropy"],
|
||||||
|
>>> },
|
||||||
|
>>> # ...
|
||||||
|
>>> )
|
||||||
|
"""
|
||||||
|
_method: AbstractMethod
|
||||||
|
def fit(self, X, y): # data is a qp.LabelledCollection
|
||||||
|
self._method.fit(X, y)
|
||||||
|
return self
|
||||||
|
def predict(self, X):
|
||||||
|
return self._method.predict(X)
|
||||||
|
def set_params(self, **params):
|
||||||
|
self._method.set_params(**params)
|
||||||
|
return self
|
||||||
|
def get_params(self, deep=True):
|
||||||
|
return self._method.get_params(deep)
|
||||||
|
def __str__(self):
|
||||||
|
return self._method.__str__()
|
||||||
|
|
||||||
|
def ComposableQuantifier(loss, representation, **kwargs):
|
||||||
"""A generic quantification / unfolding method that solves a linear system of equations.
|
"""A generic quantification / unfolding method that solves a linear system of equations.
|
||||||
|
|
||||||
This class represents any quantifier that can be described in terms of a loss function, a feature transformation, and a regularization term. In this implementation, the loss is minimized through unconstrained second-order minimization. Valid probability estimates are ensured through a soft-max trick by Bunse (2022).
|
This class represents any quantifier that can be described in terms of a loss function, a feature transformation, and a regularization term. In this implementation, the loss is minimized through unconstrained second-order minimization. Valid probability estimates are ensured through a soft-max trick by Bunse (2022).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
loss: An instance of a loss class from `quapy.methods.composable`.
|
loss: An instance of a loss class from `quapy.methods.composable`.
|
||||||
transformer: An instance of a transformer class from `quapy.methods.composable`.
|
representation: An instance of a representation class from `quapy.methods.composable`.
|
||||||
solver (optional): The `method` argument in `scipy.optimize.minimize`. Defaults to `"trust-ncg"`.
|
solver (optional): The `method` argument in `scipy.optimize.minimize`. Defaults to `"trust-ncg"`.
|
||||||
solver_options (optional): The `options` argument in `scipy.optimize.minimize`. Defaults to `{"gtol": 1e-8, "maxiter": 1000}`.
|
solver_options (optional): The `options` argument in `scipy.optimize.minimize`. Defaults to `{"gtol": 1e-8, "maxiter": 1000}`.
|
||||||
seed (optional): A random number generator seed from which a numpy RandomState is created. Defaults to `None`.
|
seed (optional): A random number generator seed from which a numpy RandomState is created. Defaults to `None`.
|
||||||
|
|
@ -92,12 +130,12 @@ def ComposableQuantifier(loss, transformer, **kwargs):
|
||||||
>>> ComposableQuantifier,
|
>>> ComposableQuantifier,
|
||||||
>>> TikhonovRegularized,
|
>>> TikhonovRegularized,
|
||||||
>>> LeastSquaresLoss,
|
>>> LeastSquaresLoss,
|
||||||
>>> ClassTransformer,
|
>>> ClassRepresentation,
|
||||||
>>> )
|
>>> )
|
||||||
>>> from sklearn.ensemble import RandomForestClassifier
|
>>> from sklearn.ensemble import RandomForestClassifier
|
||||||
>>> o_acc = ComposableQuantifier(
|
>>> o_acc = ComposableQuantifier(
|
||||||
>>> TikhonovRegularized(LeastSquaresLoss(), 0.01),
|
>>> TikhonovRegularized(LeastSquaresLoss(), 0.01),
|
||||||
>>> ClassTransformer(RandomForestClassifier(oob_score=True))
|
>>> ClassRepresentation(RandomForestClassifier(oob_score=True))
|
||||||
>>> )
|
>>> )
|
||||||
|
|
||||||
Here, we perform hyper-parameter optimization with the ordinal ACC.
|
Here, we perform hyper-parameter optimization with the ordinal ACC.
|
||||||
|
|
@ -105,21 +143,18 @@ def ComposableQuantifier(loss, transformer, **kwargs):
|
||||||
>>> quapy.model_selection.GridSearchQ(
|
>>> quapy.model_selection.GridSearchQ(
|
||||||
>>> model = o_acc,
|
>>> model = o_acc,
|
||||||
>>> param_grid = { # try both splitting criteria
|
>>> param_grid = { # try both splitting criteria
|
||||||
>>> "transformer__classifier__estimator__criterion": ["gini", "entropy"],
|
>>> "representation__classifier__estimator__criterion": ["gini", "entropy"],
|
||||||
>>> },
|
>>> },
|
||||||
>>> # ...
|
>>> # ...
|
||||||
>>> )
|
>>> )
|
||||||
|
|
||||||
To use a classifier that does not provide the `oob_score` argument, such as logistic regression, you have to configure a cross validation of this classifier. Here, we employ 10 cross validation folds. 5 folds are the default.
|
To use a classifier that does not provide the `oob_score` argument, such as logistic regression, you have to configure a cross validation of this classifier. Here, we employ 10 cross validation folds. 5 folds are the default.
|
||||||
|
|
||||||
>>> from quapy.method.composable import CVClassifier
|
>>> from quapy.method.composable import CVClassifier
|
||||||
>>> from sklearn.linear_model import LogisticRegression
|
>>> from sklearn.linear_model import LogisticRegression
|
||||||
>>> acc_lr = ComposableQuantifier(
|
>>> acc_lr = ComposableQuantifier(
|
||||||
>>> LeastSquaresLoss(),
|
>>> LeastSquaresLoss(),
|
||||||
>>> ClassTransformer(CVClassifier(LogisticRegression(), 10))
|
>>> ClassRepresentation(CVClassifier(LogisticRegression(), 10))
|
||||||
>>> )
|
>>> )
|
||||||
"""
|
"""
|
||||||
if not check_compatible_qunfold_version():
|
return QUnfoldWrapper(LinearMethod(loss, representation, **kwargs))
|
||||||
raise ImportError(__old_version_message)
|
|
||||||
|
|
||||||
return QuaPyWrapper(qunfold.GenericMethod(loss, transformer, **kwargs))
|
|
||||||
|
|
@ -15,8 +15,11 @@ class TestDatasets(unittest.TestCase):
|
||||||
return PCC(LogisticRegression(C=0.001, max_iter=100))
|
return PCC(LogisticRegression(C=0.001, max_iter=100))
|
||||||
|
|
||||||
def _check_dataset(self, dataset):
|
def _check_dataset(self, dataset):
|
||||||
|
train, test = dataset.reduce().train_test
|
||||||
q = self.new_quantifier()
|
q = self.new_quantifier()
|
||||||
print(f'testing method {q} in {dataset.name}...', end='')
|
print(f'testing method {q} in {dataset.name}...', end='')
|
||||||
|
if len(train)>500:
|
||||||
|
train = train.sampling(500)
|
||||||
q.fit(*dataset.training.Xy)
|
q.fit(*dataset.training.Xy)
|
||||||
estim_prevalences = q.predict(dataset.test.instances)
|
estim_prevalences = q.predict(dataset.test.instances)
|
||||||
self.assertTrue(F.check_prevalence_vector(estim_prevalences))
|
self.assertTrue(F.check_prevalence_vector(estim_prevalences))
|
||||||
|
|
@ -42,7 +45,9 @@ class TestDatasets(unittest.TestCase):
|
||||||
self._check_dataset(dataset)
|
self._check_dataset(dataset)
|
||||||
|
|
||||||
def test_twitter(self):
|
def test_twitter(self):
|
||||||
for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST:
|
# all the datasets are contained in the same resource; if the first one
|
||||||
|
# works, there is no need to test for the rest
|
||||||
|
for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST[:1]:
|
||||||
print(f'loading dataset {dataset_name}...', end='')
|
print(f'loading dataset {dataset_name}...', end='')
|
||||||
dataset = fetch_twitter(dataset_name, min_df=10)
|
dataset = fetch_twitter(dataset_name, min_df=10)
|
||||||
dataset.stats()
|
dataset.stats()
|
||||||
|
|
@ -129,7 +134,7 @@ class TestDatasets(unittest.TestCase):
|
||||||
n_classes = train.n_classes
|
n_classes = train.n_classes
|
||||||
train = train.sampling(100, *F.uniform_prevalence(n_classes))
|
train = train.sampling(100, *F.uniform_prevalence(n_classes))
|
||||||
q = self.new_quantifier()
|
q = self.new_quantifier()
|
||||||
q.fit(train)
|
q.fit(*train.Xy)
|
||||||
self._check_samples(gen, q, max_samples_test=5)
|
self._check_samples(gen, q, max_samples_test=5)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,9 +9,8 @@ import inspect
|
||||||
class HierarchyTestCase(unittest.TestCase):
|
class HierarchyTestCase(unittest.TestCase):
|
||||||
|
|
||||||
def test_aggregative(self):
|
def test_aggregative(self):
|
||||||
lr = LogisticRegression()
|
|
||||||
for m in AGGREGATIVE_METHODS:
|
for m in AGGREGATIVE_METHODS:
|
||||||
self.assertEqual(isinstance(m(lr), AggregativeQuantifier), True)
|
self.assertEqual(isinstance(m(), AggregativeQuantifier), True)
|
||||||
|
|
||||||
def test_inspect_aggregative(self):
|
def test_inspect_aggregative(self):
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import unittest
|
||||||
from sklearn.linear_model import LogisticRegression
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
|
from method.aggregative import OneVsAllAggregative
|
||||||
from quapy.method.aggregative import ACC
|
from quapy.method.aggregative import ACC
|
||||||
from quapy.method.meta import Ensemble
|
from quapy.method.meta import Ensemble
|
||||||
from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_METHODS
|
from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_METHODS
|
||||||
|
|
@ -16,21 +17,21 @@ from quapy.method.composable import (
|
||||||
ComposableQuantifier,
|
ComposableQuantifier,
|
||||||
LeastSquaresLoss,
|
LeastSquaresLoss,
|
||||||
HellingerSurrogateLoss,
|
HellingerSurrogateLoss,
|
||||||
ClassTransformer,
|
ClassRepresentation,
|
||||||
HistogramTransformer,
|
HistogramRepresentation,
|
||||||
CVClassifier
|
CVClassifier
|
||||||
)
|
)
|
||||||
|
|
||||||
COMPOSABLE_METHODS = [
|
COMPOSABLE_METHODS = [
|
||||||
ComposableQuantifier( # ACC
|
ComposableQuantifier( # ACC
|
||||||
LeastSquaresLoss(),
|
LeastSquaresLoss(),
|
||||||
ClassTransformer(CVClassifier(LogisticRegression()))
|
ClassRepresentation(CVClassifier(LogisticRegression()))
|
||||||
),
|
),
|
||||||
ComposableQuantifier( # HDy
|
ComposableQuantifier( # HDy
|
||||||
HellingerSurrogateLoss(),
|
HellingerSurrogateLoss(),
|
||||||
HistogramTransformer(
|
HistogramRepresentation(
|
||||||
3, # 3 bins per class
|
3, # 3 bins per class
|
||||||
preprocessor = ClassTransformer(CVClassifier(LogisticRegression()))
|
preprocessor = ClassRepresentation(CVClassifier(LogisticRegression()))
|
||||||
)
|
)
|
||||||
),
|
),
|
||||||
]
|
]
|
||||||
|
|
@ -113,7 +114,6 @@ class TestMethods(unittest.TestCase):
|
||||||
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
||||||
|
|
||||||
def test_composable(self):
|
def test_composable(self):
|
||||||
from packaging.version import Version
|
|
||||||
if check_compatible_qunfold_version():
|
if check_compatible_qunfold_version():
|
||||||
for dataset in TestMethods.datasets:
|
for dataset in TestMethods.datasets:
|
||||||
for q in COMPOSABLE_METHODS:
|
for q in COMPOSABLE_METHODS:
|
||||||
|
|
|
||||||
|
|
@ -39,31 +39,30 @@ class ModselTestCase(unittest.TestCase):
|
||||||
obtains the same optimal parameters
|
obtains the same optimal parameters
|
||||||
"""
|
"""
|
||||||
|
|
||||||
q = PACC(LogisticRegression(random_state=1, max_iter=500))
|
q = PACC(LogisticRegression(random_state=1, max_iter=3000))
|
||||||
|
|
||||||
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=50).reduce(n_train=500, random_state=1)
|
data = qp.datasets.fetch_reviews('imdb', tfidf=True, min_df=50)
|
||||||
training, validation = data.training.split_stratified(0.7, random_state=1)
|
training, validation = data.training.split_stratified(0.7, random_state=1)
|
||||||
|
|
||||||
param_grid = {'classifier__C': np.logspace(-3,3,7)}
|
param_grid = {'classifier__C': np.logspace(-3,3,7), 'classifier__class_weight': ['balanced', None]}
|
||||||
app = APP(validation, sample_size=100, random_state=1)
|
app = APP(validation, sample_size=100, random_state=1)
|
||||||
|
|
||||||
print('starting model selection in sequential exploration')
|
def do_gridsearch(n_jobs):
|
||||||
tinit = time.time()
|
print('starting model selection in sequential exploration')
|
||||||
modsel = GridSearchQ(
|
t_init = time.time()
|
||||||
q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=1, verbose=True
|
modsel = GridSearchQ(
|
||||||
).fit(*training.Xy)
|
q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=n_jobs, verbose=True
|
||||||
tend_seq = time.time()-tinit
|
).fit(*training.Xy)
|
||||||
best_c_seq = modsel.best_params_['classifier__C']
|
t_end = time.time()-t_init
|
||||||
print(f'[done] took {tend_seq:.2f}s best C = {best_c_seq}')
|
best_c = modsel.best_params_['classifier__C']
|
||||||
|
print(f'[done] took {t_end:.2f}s best C = {best_c}')
|
||||||
|
return t_end, best_c
|
||||||
|
|
||||||
print('starting model selection in parallel exploration')
|
tend_seq, best_c_seq = do_gridsearch(n_jobs=1)
|
||||||
tinit = time.time()
|
tend_par, best_c_par = do_gridsearch(n_jobs=-1)
|
||||||
modsel = GridSearchQ(
|
|
||||||
q, param_grid, protocol=app, error='mae', refit=False, timeout=-1, n_jobs=-1, verbose=True
|
print(tend_seq, best_c_seq)
|
||||||
).fit(*training.Xy)
|
print(tend_par, best_c_par)
|
||||||
tend_par = time.time() - tinit
|
|
||||||
best_c_par = modsel.best_params_['classifier__C']
|
|
||||||
print(f'[done] took {tend_par:.2f}s best C = {best_c_par}')
|
|
||||||
|
|
||||||
self.assertEqual(best_c_seq, best_c_par)
|
self.assertEqual(best_c_seq, best_c_par)
|
||||||
self.assertLess(tend_par, tend_seq)
|
self.assertLess(tend_par, tend_seq)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue