diff --git a/quapy/tests/test_base.py b/quapy/tests/test_base.py new file mode 100644 index 0000000..7e2b4f8 --- /dev/null +++ b/quapy/tests/test_base.py @@ -0,0 +1,11 @@ +import unittest + + +class ImportTest(unittest.TestCase): + def test_import(self): + import quapy as qp + self.assertIsNotNone(qp.__version__) + + +if __name__ == '__main__': + unittest.main() diff --git a/quapy/tests/test_datasets.py b/quapy/tests/test_datasets.py new file mode 100644 index 0000000..daa9207 --- /dev/null +++ b/quapy/tests/test_datasets.py @@ -0,0 +1,119 @@ +import unittest + +from sklearn.feature_extraction.text import TfidfVectorizer +from sklearn.linear_model import LogisticRegression + +import quapy.functional as F +from quapy.method.aggregative import PCC +from quapy.data.datasets import * + + +class TestDatasets(unittest.TestCase): + + def new_quantifier(self): + return PCC(LogisticRegression(C=0.001, max_iter=100)) + + def _check_dataset(self, dataset): + q = self.new_quantifier() + print(f'testing method {q} in {dataset.name}...', end='') + q.fit(dataset.training) + estim_prevalences = q.quantify(dataset.test.instances) + self.assertTrue(F.check_prevalence_vector(estim_prevalences)) + print(f'[done]') + + def _check_samples(self, gen, q, max_samples_test=5, vectorizer=None): + for X, p in gen(): + if vectorizer is not None: + X = vectorizer.transform(X) + estim_prevalences = q.quantify(X) + self.assertTrue(F.check_prevalence_vector(estim_prevalences)) + max_samples_test -= 1 + if max_samples_test == 0: + break + + def test_reviews(self): + for dataset_name in REVIEWS_SENTIMENT_DATASETS: + print(f'loading dataset {dataset_name}...', end='') + dataset = fetch_reviews(dataset_name, tfidf=True, min_df=10) + dataset.stats() + dataset.reduce() + print(f'[done]') + self._check_dataset(dataset) + + def test_twitter(self): + for dataset_name in TWITTER_SENTIMENT_DATASETS_TEST: + print(f'loading dataset {dataset_name}...', end='') + dataset = fetch_twitter(dataset_name, min_df=10) + dataset.stats() + dataset.reduce() + print(f'[done]') + self._check_dataset(dataset) + + def test_UCIBinaryDataset(self): + for dataset_name in UCI_BINARY_DATASETS: + try: + print(f'loading dataset {dataset_name}...', end='') + dataset = fetch_UCIBinaryDataset(dataset_name) + dataset.stats() + dataset.reduce() + print(f'[done]') + self._check_dataset(dataset) + except FileNotFoundError as fnfe: + if dataset_name == 'pageblocks.5' and fnfe.args[0].find( + 'If this is the first time you attempt to load this dataset') > 0: + print('The pageblocks.5 dataset requires some hand processing to be usable; skipping this test.') + continue + + def test_UCIMultiDataset(self): + for dataset_name in UCI_MULTICLASS_DATASETS: + print(f'loading dataset {dataset_name}...', end='') + dataset = fetch_UCIMulticlassDataset(dataset_name) + dataset.stats() + n_classes = dataset.n_classes + uniform_prev = F.uniform_prevalence(n_classes) + dataset.training = dataset.training.sampling(100, *uniform_prev) + dataset.test = dataset.test.sampling(100, *uniform_prev) + print(f'[done]') + self._check_dataset(dataset) + + def test_lequa2022(self): + + for dataset_name in LEQUA2022_VECTOR_TASKS: + print(f'loading dataset {dataset_name}...', end='') + train, gen_val, gen_test = fetch_lequa2022(dataset_name) + train.stats() + n_classes = train.n_classes + train = train.sampling(100, *F.uniform_prevalence(n_classes)) + q = self.new_quantifier() + q.fit(train) + self._check_samples(gen_val, q, max_samples_test=5) + self._check_samples(gen_test, q, max_samples_test=5) + + for dataset_name in LEQUA2022_TEXT_TASKS: + print(f'loading dataset {dataset_name}...', end='') + train, gen_val, gen_test = fetch_lequa2022(dataset_name) + train.stats() + n_classes = train.n_classes + train = train.sampling(100, *F.uniform_prevalence(n_classes)) + tfidf = TfidfVectorizer() + train.instances = tfidf.fit_transform(train.instances) + q = self.new_quantifier() + q.fit(train) + self._check_samples(gen_val, q, max_samples_test=5, vectorizer=tfidf) + self._check_samples(gen_test, q, max_samples_test=5, vectorizer=tfidf) + + + def test_IFCB(self): + print(f'loading dataset IFCB.') + for mod_sel in [False, True]: + train, gen = fetch_IFCB(single_sample_train=True, for_model_selection=mod_sel) + train.stats() + n_classes = train.n_classes + train = train.sampling(100, *F.uniform_prevalence(n_classes)) + q = self.new_quantifier() + q.fit(train) + self._check_samples(gen, q, max_samples_test=5) + + +if __name__ == '__main__': + unittest.main() diff --git a/quapy/tests/test_hierarchy.py b/quapy/tests/test_hierarchy.py new file mode 100644 index 0000000..0cf9b9b --- /dev/null +++ b/quapy/tests/test_hierarchy.py @@ -0,0 +1,46 @@ +import unittest +from sklearn.linear_model import LogisticRegression + +from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS +from quapy.method.aggregative import * +import inspect + + +class HierarchyTestCase(unittest.TestCase): + + def test_aggregative(self): + lr = LogisticRegression() + for m in AGGREGATIVE_METHODS: + self.assertEqual(isinstance(m(lr), AggregativeQuantifier), True) + + def test_inspect_aggregative(self): + + import quapy.method.aggregative as methods + + members = inspect.getmembers(methods) + classes = set([cls for name, cls in members if inspect.isclass(cls)]) + quantifiers = [cls for cls in classes if issubclass(cls, BaseQuantifier)] + quantifiers = [cls for cls in quantifiers if issubclass(cls, AggregativeQuantifier)] + quantifiers = [cls for cls in quantifiers if not inspect.isabstract(cls) ] + + for cls in quantifiers: + self.assertIn(cls, AGGREGATIVE_METHODS) + + def test_binary(self): + lr = LogisticRegression() + for m in BINARY_METHODS: + self.assertEqual(isinstance(m(lr), BinaryQuantifier), True) + + def test_probabilistic(self): + lr = LogisticRegression() + for m in [CC(lr), ACC(lr)]: + self.assertEqual(isinstance(m, AggregativeCrispQuantifier), True) + self.assertEqual(isinstance(m, AggregativeSoftQuantifier), False) + for m in [PCC(lr), PACC(lr)]: + self.assertEqual(isinstance(m, AggregativeCrispQuantifier), False) + self.assertEqual(isinstance(m, AggregativeSoftQuantifier), True) + + +if __name__ == '__main__': + unittest.main() + diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py new file mode 100644 index 0000000..69d627b --- /dev/null +++ b/quapy/tests/test_methods.py @@ -0,0 +1,92 @@ +import itertools +import unittest + +from sklearn.linear_model import LogisticRegression + +import quapy as qp +from quapy.method.aggregative import ACC +from quapy.method.meta import Ensemble +from quapy.method import AGGREGATIVE_METHODS, BINARY_METHODS, NON_AGGREGATIVE_METHODS +from quapy.functional import check_prevalence_vector + +class TestMethods(unittest.TestCase): + + tiny_dataset_multiclass = qp.datasets.fetch_UCIMulticlassDataset('academic-success').reduce(n_test=10) + tiny_dataset_binary = qp.datasets.fetch_UCIBinaryDataset('ionosphere').reduce(n_test=10) + datasets = [tiny_dataset_binary, tiny_dataset_multiclass] + + def test_aggregative(self): + for dataset in TestMethods.datasets: + learner = LogisticRegression() + learner.fit(*dataset.training.Xy) + + for model in AGGREGATIVE_METHODS: + if not dataset.binary and model in BINARY_METHODS: + print(f'skipping the test of binary model {model.__name__} on multiclass dataset {dataset.name}') + continue + + q = model(learner) + print('testing', q) + q.fit(dataset.training, fit_classifier=False) + estim_prevalences = q.quantify(dataset.test.X) + self.assertTrue(check_prevalence_vector(estim_prevalences)) + + def test_non_aggregative(self): + for dataset in TestMethods.datasets: + + for model in NON_AGGREGATIVE_METHODS: + if not dataset.binary and model in BINARY_METHODS: + print(f'skipping the test of binary model {model.__name__} on multiclass dataset {dataset.name}') + continue + + q = model() + print(f'testing {q} on dataset {dataset.name}') + q.fit(dataset.training) + estim_prevalences = q.quantify(dataset.test.X) + self.assertTrue(check_prevalence_vector(estim_prevalences)) + + def test_ensembles(self): + + qp.environ['SAMPLE_SIZE'] = 10 + + base_quantifier = ACC(LogisticRegression()) + for dataset, policy in itertools.product(TestMethods.datasets, Ensemble.VALID_POLICIES): + if not dataset.binary and policy == 'ds': + print(f'skipping the test of binary policy ds on non-binary dataset {dataset}') + continue + + print(f'testing {base_quantifier} on dataset {dataset.name} with {policy=}') + ensemble = Ensemble(quantifier=base_quantifier, size=3, policy=policy, n_jobs=-1) + ensemble.fit(dataset.training) + estim_prevalences = ensemble.quantify(dataset.test.instances) + self.assertTrue(check_prevalence_vector(estim_prevalences)) + + def test_quanet(self): + try: + import quapy.classification.neural + except ModuleNotFoundError: + print('the torch package is not installed; skipping unit test for QuaNet') + return + + qp.environ['SAMPLE_SIZE'] = 10 + + # load the kindle dataset as text, and convert words to numerical indexes + dataset = qp.datasets.fetch_reviews('kindle', pickle=True).reduce() + qp.data.preprocessing.index(dataset, min_df=5, inplace=True) + + from quapy.classification.neural import CNNnet + cnn = CNNnet(dataset.vocabulary_size, dataset.n_classes) + + from quapy.classification.neural import NeuralClassifierTrainer + learner = NeuralClassifierTrainer(cnn, device='cpu') + + from quapy.method.meta import QuaNet + model = QuaNet(learner, device='cpu', n_epochs=2, tr_iter_per_poch=10, va_iter_per_poch=10, patience=2) + + model.fit(dataset.training) + estim_prevalences = model.quantify(dataset.test.instances) + self.assertTrue(check_prevalence_vector(estim_prevalences)) + + +if __name__ == '__main__': + unittest.main()