refactored unittests
This commit is contained in:
parent
561b672200
commit
db6ff4ab9e
|
@ -0,0 +1,11 @@
|
|||
import unittest
|
||||
|
||||
|
||||
class ImportTest(unittest.TestCase):
|
||||
def test_import(self):
|
||||
import quapy as qp
|
||||
self.assertIsNotNone(qp.__version__)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -0,0 +1,119 @@
|
|||
import unittest
|
||||
|
||||
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy.functional as F
|
||||
from quapy.method.aggregative import PCC
|
||||
from quapy.data.datasets import *
|
||||
|
||||
|
||||
class TestDatasets(unittest.TestCase):
|
||||
|
||||
def new_quantifier(self):
|
||||
return PCC(LogisticRegression(C=0.001, max_iter=100))
|
||||
|
||||
def _check_dataset(self, dataset):
|
||||
q = self.new_quantifier()
|
||||
print(f'testing method {q} in {dataset.name}...', end='')
|
||||
q.fit(dataset.training)
|
||||
estim_prevalences = q.quantify(dataset.test.instances)
|
||||
self.assertTrue(F.check_prevalence_vector(estim_prevalences))
|
||||
print(f'[done]')
|
||||
|
||||
def _check_samples(self, gen, q, max_samples_test=5, vectorizer=None):
|
||||
for X, p in gen():
|
||||
if vectorizer is not None:
|
||||
X = vectorizer.transform(X)
|
||||
estim_prevalences = q.quantify(X)
|
||||
self.assertTrue(F.check_prevalence_vector(estim_prevalences))
|
||||
max_samples_test -= 1
|
||||
if max_samples_test == 0:
|
||||
break
|
||||
|
||||
def test_reviews(self):
|
||||
for dataset_name in REVIEWS_SENTIMENT_DATASETS:
|
||||
print(f'loading dataset {dataset_name}...', end='')
|
||||
dataset = fetch_reviews(dataset_name, tfidf=True, min_df=10)
|
||||
dataset.stats()
|
||||
dataset.reduce()
|
||||
print(f'[done]')
|
||||
self._check_dataset(dataset)
|
||||
|
||||
def test_twitter(self):
|
||||
for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST:
|
||||
print(f'loading dataset {dataset_name}...', end='')
|
||||
dataset = fetch_twitter(dataset_name, min_df=10)
|
||||
dataset.stats()
|
||||
dataset.reduce()
|
||||
print(f'[done]')
|
||||
self._check_dataset(dataset)
|
||||
|
||||
def test_UCIBinaryDataset(self):
|
||||
for dataset_name in UCI_BINARY_DATASETS:
|
||||
try:
|
||||
print(f'loading dataset {dataset_name}...', end='')
|
||||
dataset = fetch_UCIBinaryDataset(dataset_name)
|
||||
dataset.stats()
|
||||
dataset.reduce()
|
||||
print(f'[done]')
|
||||
self._check_dataset(dataset)
|
||||
except FileNotFoundError as fnfe:
|
||||
if dataset_name == 'pageblocks.5' and fnfe.args[0].find(
|
||||
'If this is the first time you attempt to load this dataset') > 0:
|
||||
print('The pageblocks.5 dataset requires some hand processing to be usable; skipping this test.')
|
||||
continue
|
||||
|
||||
def test_UCIMultiDataset(self):
|
||||
for dataset_name in UCI_MULTICLASS_DATASETS:
|
||||
print(f'loading dataset {dataset_name}...', end='')
|
||||
dataset = fetch_UCIMulticlassDataset(dataset_name)
|
||||
dataset.stats()
|
||||
n_classes = dataset.n_classes
|
||||
uniform_prev = F.uniform_prevalence(n_classes)
|
||||
dataset.training = dataset.training.sampling(100, *uniform_prev)
|
||||
dataset.test = dataset.test.sampling(100, *uniform_prev)
|
||||
print(f'[done]')
|
||||
self._check_dataset(dataset)
|
||||
|
||||
def test_lequa2022(self):
|
||||
|
||||
for dataset_name in LEQUA2022_VECTOR_TASKS:
|
||||
print(f'loading dataset {dataset_name}...', end='')
|
||||
train, gen_val, gen_test = fetch_lequa2022(dataset_name)
|
||||
train.stats()
|
||||
n_classes = train.n_classes
|
||||
train = train.sampling(100, *F.uniform_prevalence(n_classes))
|
||||
q = self.new_quantifier()
|
||||
q.fit(train)
|
||||
self._check_samples(gen_val, q, max_samples_test=5)
|
||||
self._check_samples(gen_test, q, max_samples_test=5)
|
||||
|
||||
for dataset_name in LEQUA2022_TEXT_TASKS:
|
||||
print(f'loading dataset {dataset_name}...', end='')
|
||||
train, gen_val, gen_test = fetch_lequa2022(dataset_name)
|
||||
train.stats()
|
||||
n_classes = train.n_classes
|
||||
train = train.sampling(100, *F.uniform_prevalence(n_classes))
|
||||
tfidf = TfidfVectorizer()
|
||||
train.instances = tfidf.fit_transform(train.instances)
|
||||
q = self.new_quantifier()
|
||||
q.fit(train)
|
||||
self._check_samples(gen_val, q, max_samples_test=5, vectorizer=tfidf)
|
||||
self._check_samples(gen_test, q, max_samples_test=5, vectorizer=tfidf)
|
||||
|
||||
|
||||
def test_IFCB(self):
|
||||
print(f'loading dataset IFCB.')
|
||||
for mod_sel in [False, True]:
|
||||
train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel)
|
||||
train.stats()
|
||||
n_classes = train.n_classes
|
||||
train = train.sampling(100, *F.uniform_prevalence(n_classes))
|
||||
q = self.new_quantifier()
|
||||
q.fit(train)
|
||||
self._check_samples(gen, q, max_samples_test=5)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
|
@ -0,0 +1,46 @@
|
|||
import unittest
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS
|
||||
from quapy.method.aggregative import *
|
||||
import inspect
|
||||
|
||||
|
||||
class HierarchyTestCase(unittest.TestCase):
|
||||
|
||||
def test_aggregative(self):
|
||||
lr = LogisticRegression()
|
||||
for m in AGGREGATIVE_METHODS:
|
||||
self.assertEqual(isinstance(m(lr), AggregativeQuantifier), True)
|
||||
|
||||
def test_inspect_aggregative(self):
|
||||
|
||||
import quapy.method.aggregative as methods
|
||||
|
||||
members = inspect.getmembers(methods)
|
||||
classes = set([cls for name, cls in members if inspect.isclass(cls)])
|
||||
quantifiers = [cls for cls in classes if issubclass(cls, BaseQuantifier)]
|
||||
quantifiers = [cls for cls in quantifiers if issubclass(cls, AggregativeQuantifier)]
|
||||
quantifiers = [cls for cls in quantifiers if not inspect.isabstract(cls) ]
|
||||
|
||||
for cls in quantifiers:
|
||||
self.assertIn(cls, AGGREGATIVE_METHODS)
|
||||
|
||||
def test_binary(self):
|
||||
lr = LogisticRegression()
|
||||
for m in BINARY_METHODS:
|
||||
self.assertEqual(isinstance(m(lr), BinaryQuantifier), True)
|
||||
|
||||
def test_probabilistic(self):
|
||||
lr = LogisticRegression()
|
||||
for m in [CC(lr), ACC(lr)]:
|
||||
self.assertEqual(isinstance(m, AggregativeCrispQuantifier), True)
|
||||
self.assertEqual(isinstance(m, AggregativeSoftQuantifier), False)
|
||||
for m in [PCC(lr), PACC(lr)]:
|
||||
self.assertEqual(isinstance(m, AggregativeCrispQuantifier), False)
|
||||
self.assertEqual(isinstance(m, AggregativeSoftQuantifier), True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
@ -0,0 +1,92 @@
|
|||
import itertools
|
||||
import unittest
|
||||
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
|
||||
import quapy as qp
|
||||
from quapy.method.aggregative import ACC
|
||||
from quapy.method.meta import Ensemble
|
||||
from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_METHODS
|
||||
from quapy.functional import check_prevalence_vector
|
||||
|
||||
class TestMethods(unittest.TestCase):
|
||||
|
||||
tiny_dataset_multiclass = qp.datasets.fetch_UCIMulticlassDataset('academic-success').reduce(n_test=10)
|
||||
tiny_dataset_binary = qp.datasets.fetch_UCIBinaryDataset('ionosphere').reduce(n_test=10)
|
||||
datasets = [tiny_dataset_binary, tiny_dataset_multiclass]
|
||||
|
||||
def test_aggregative(self):
|
||||
for dataset in TestMethods.datasets:
|
||||
learner = LogisticRegression()
|
||||
learner.fit(*dataset.training.Xy)
|
||||
|
||||
for model in AGGREGATIVE_METHODS:
|
||||
if not dataset.binary and model in BINARY_METHODS:
|
||||
print(f'skipping the test of binary model {model.__name__} on multiclass dataset {dataset.name}')
|
||||
continue
|
||||
|
||||
q = model(learner)
|
||||
print('testing', q)
|
||||
q.fit(dataset.training, fit_classifier=False)
|
||||
estim_prevalences = q.quantify(dataset.test.X)
|
||||
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
||||
|
||||
def test_non_aggregative(self):
|
||||
for dataset in TestMethods.datasets:
|
||||
|
||||
for model in NON_AGGREGATIVE_METHODS:
|
||||
if not dataset.binary and model in BINARY_METHODS:
|
||||
print(f'skipping the test of binary model {model.__name__} on multiclass dataset {dataset.name}')
|
||||
continue
|
||||
|
||||
q = model()
|
||||
print(f'testing {q} on dataset {dataset.name}')
|
||||
q.fit(dataset.training)
|
||||
estim_prevalences = q.quantify(dataset.test.X)
|
||||
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
||||
|
||||
def test_ensembles(self):
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 10
|
||||
|
||||
base_quantifier = ACC(LogisticRegression())
|
||||
for dataset, policy in itertools.product(TestMethods.datasets, Ensemble.VALID_POLICIES):
|
||||
if not dataset.binary and policy == 'ds':
|
||||
print(f'skipping the test of binary policy ds on non-binary dataset {dataset}')
|
||||
continue
|
||||
|
||||
print(f'testing {base_quantifier} on dataset {dataset.name} with {policy=}')
|
||||
ensemble = Ensemble(quantifier=base_quantifier, size=3, policy=policy, n_jobs=-1)
|
||||
ensemble.fit(dataset.training)
|
||||
estim_prevalences = ensemble.quantify(dataset.test.instances)
|
||||
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
||||
|
||||
def test_quanet(self):
|
||||
try:
|
||||
import quapy.classification.neural
|
||||
except ModuleNotFoundError:
|
||||
print('the torch package is not installed; skipping unit test for QuaNet')
|
||||
return
|
||||
|
||||
qp.environ['SAMPLE_SIZE'] = 10
|
||||
|
||||
# load the kindle dataset as text, and convert words to numerical indexes
|
||||
dataset = qp.datasets.fetch_reviews('kindle', pickle=True).reduce()
|
||||
qp.data.preprocessing.index(dataset, min_df=5, inplace=True)
|
||||
|
||||
from quapy.classification.neural import CNNnet
|
||||
cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes)
|
||||
|
||||
from quapy.classification.neural import NeuralClassifierTrainer
|
||||
learner = NeuralClassifierTrainer(cnn, device='cpu')
|
||||
|
||||
from quapy.method.meta import QuaNet
|
||||
model = QuaNet(learner, device='cpu', n_epochs=2, tr_iter_per_poch=10, va_iter_per_poch=10, patience=2)
|
||||
|
||||
model.fit(dataset.training)
|
||||
estim_prevalences = model.quantify(dataset.test.instances)
|
||||
self.assertTrue(check_prevalence_vector(estim_prevalences))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in New Issue