diff --git a/TODO.txt b/TODO.txt index 2e153a2..2496a4b 100644 --- a/TODO.txt +++ b/TODO.txt @@ -2,7 +2,6 @@ Packaging: ========================================== Documentation with sphinx Document methods with paper references -allow for "pip install" unit-tests New features: diff --git a/quapy/__init__.py b/quapy/__init__.py index cffae1c..7fea635 100644 --- a/quapy/__init__.py +++ b/quapy/__init__.py @@ -10,7 +10,7 @@ from . import model_selection from . import classification from quapy.method.base import isprobabilistic, isaggregative -__version__ = '0.1' +__version__ = '0.1.4' environ = { 'SAMPLE_SIZE': None, diff --git a/quapy/method/meta.py b/quapy/method/meta.py index e74c969..fc3efe3 100644 --- a/quapy/method/meta.py +++ b/quapy/method/meta.py @@ -1,28 +1,32 @@ from copy import deepcopy from typing import Union +import numpy as np +from sklearn.linear_model import LogisticRegression from sklearn.metrics import f1_score, make_scorer, accuracy_score +from sklearn.model_selection import GridSearchCV, cross_val_predict from tqdm import tqdm -import numpy as np -from joblib import Parallel, delayed -from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import GridSearchCV, cross_val_predict - import quapy as qp -from quapy.data import LabelledCollection from quapy import functional as F +from quapy.data import LabelledCollection from quapy.evaluation import evaluate from quapy.model_selection import GridSearchQ -from . import neural -from .base import BaseQuantifier -from quapy.method.aggregative import CC, ACC, PCC, PACC, HDy, EMQ -QuaNet = neural.QuaNetTrainer +try: + from . import neural +except ModuleNotFoundError: + neural = None +from .base import BaseQuantifier +from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ + +if neural: + QuaNet = neural.QuaNetTrainer +else: + QuaNet = "QuaNet is not available due to missing torch package" class Ensemble(BaseQuantifier): - VALID_POLICIES = {'ave', 'ptr', 'ds'} | qp.error.QUANTIFICATION_ERROR_NAMES """ @@ -65,9 +69,9 @@ class Ensemble(BaseQuantifier): if self.verbose: print('[Ensemble]' + msg) - def fit(self, data: qp.data.LabelledCollection, val_split: Union[qp.data.LabelledCollection, float]=None): + def fit(self, data: qp.data.LabelledCollection, val_split: Union[qp.data.LabelledCollection, float] = None): self.sout('Fit') - if self.policy=='ds' and not data.binary: + if self.policy == 'ds' and not data.binary: raise ValueError(f'ds policy is only defined for binary quantification, but this dataset is not binary') if val_split is None: val_split = self.val_split @@ -132,7 +136,7 @@ class Ensemble(BaseQuantifier): tests = [m[3] for m in self.ensemble] scores = [] for i, model in enumerate(self.ensemble): - scores.append(evaluate(model[0], tests[:i] + tests[i+1:], error, self.n_jobs)) + scores.append(evaluate(model[0], tests[:i] + tests[i + 1:], error, self.n_jobs)) order = np.argsort(scores) self.ensemble = _select_k(self.ensemble, order, k=self.red_size) @@ -168,7 +172,7 @@ class Ensemble(BaseQuantifier): lr_base = LogisticRegression(class_weight='balanced', max_iter=1000) optim = GridSearchCV( - lr_base, param_grid={'C': np.logspace(-4,4,9)}, cv=5, n_jobs=self.n_jobs, refit=True + lr_base, param_grid={'C': np.logspace(-4, 4, 9)}, cv=5, n_jobs=self.n_jobs, refit=True ).fit(X, y) posteriors = cross_val_predict( @@ -204,8 +208,8 @@ class Ensemble(BaseQuantifier): def get_probability_distribution(posterior_probabilities, bins=8): - assert posterior_probabilities.shape[1]==2, 'the posterior probabilities do not seem to be for a binary problem' - posterior_probabilities = posterior_probabilities[:,1] # take the positive posteriors only + assert posterior_probabilities.shape[1] == 2, 'the posterior probabilities do not seem to be for a binary problem' + posterior_probabilities = posterior_probabilities[:, 1] # take the positive posteriors only distribution, _ = np.histogram(posterior_probabilities, bins=bins, range=(0, 1), density=True) return distribution @@ -223,7 +227,7 @@ def _delayed_new_instance(args): if val_split is not None: if isinstance(val_split, float): assert 0 < val_split < 1, 'val_split should be in (0,1)' - data, val_split = data.split_stratified(train_prop=1-val_split) + data, val_split = data.split_stratified(train_prop=1 - val_split) sample_index = data.sampling_index(sample_size, *prev) sample = data.sampling_from_index(sample_index) @@ -255,7 +259,7 @@ def _draw_simplex(ndim, min_val, max_trials=100): :return: a sample from the ndim-dimensional simplex that is uniform in S(ndim)-R where S(ndim) is the simplex and R is the simplex subset containing dimensions lower than min_val """ - if min_val >= 1/ndim: + if min_val >= 1 / ndim: raise ValueError(f'no sample can be draw from the {ndim}-dimensional simplex so that ' f'all its values are >={min_val} (try with a larger value for min_pos)') trials = 0 @@ -300,14 +304,15 @@ def _check_error(error): f'the name of an error function in {qp.error.ERROR_NAMES}') -def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, param_model_sel:dict=None, **kwargs): - if optim is not None: - if param_grid is None: - raise ValueError(f'param_grid is None but optim was requested.') - if param_model_sel is None: - raise ValueError(f'param_model_sel is None but optim was requested.') - error = _check_error(optim) - return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel, **kwargs) +def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, param_model_sel: dict = None, + **kwargs): + if optim is not None: + if param_grid is None: + raise ValueError(f'param_grid is None but optim was requested.') + if param_model_sel is None: + raise ValueError(f'param_model_sel is None but optim was requested.') + error = _check_error(optim) + return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel, **kwargs) def ECC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): @@ -327,4 +332,4 @@ def EHDy(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): def EEMQ(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs): - return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, **kwargs) \ No newline at end of file + return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, **kwargs) diff --git a/quapy/tests/test_methods.py b/quapy/tests/test_methods.py index 186b7c0..bcf721c 100644 --- a/quapy/tests/test_methods.py +++ b/quapy/tests/test_methods.py @@ -100,6 +100,12 @@ def test_ensemble_method(base_method, learner, dataset: Dataset, policy): def test_quanet_method(): + try: + import quapy.classification.neural + except ModuleNotFoundError: + print('skipping QuaNet test due to missing torch package') + return + dataset = qp.datasets.fetch_reviews('kindle', pickle=True) dataset = Dataset(dataset.training.sampling(100, *dataset.training.prevalence()), dataset.test.sampling(100, *dataset.test.prevalence())) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..898ff46 --- /dev/null +++ b/setup.py @@ -0,0 +1,164 @@ +from setuptools import setup, find_packages +import pathlib + +here = pathlib.Path(__file__).parent.resolve() + +long_description = (here / 'README.md').read_text(encoding='utf-8') + + +def get_version(rel_path): + init_content = (here / rel_path).read_text(encoding='utf-8') + for line in init_content.split('\n'): + if line.startswith('__version__'): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + else: + raise RuntimeError("Unable to find version string.") +# Arguments marked as "Required" below must be included for upload to PyPI. +# Fields marked as "Optional" may be commented out. + +setup( + # This is the name of your project. The first time you publish this + # package, this name will be registered for you. It will determine how + # users can install this project, e.g.: + # + # $ pip install sampleproject + # + # And where it will live on PyPI: https://pypi.org/project/sampleproject/ + # + # There are some restrictions on what makes a valid project name + # specification here: + # https://packaging.python.org/specifications/core-metadata/#name + name='QuaPy', # Required + + # Versions should comply with PEP 440: + # https://www.python.org/dev/peps/pep-0440/ + # + # For a discussion on single-sourcing the version across setup.py and the + # project code, see + # https://packaging.python.org/en/latest/single_source_version.html + version=get_version("quapy/__init__.py"), # Required + + # This is a one-line description or tagline of what your project does. This + # corresponds to the "Summary" metadata field: + # https://packaging.python.org/specifications/core-metadata/#summary + description='QuaPy: a framework for Quantification in Python', # Optional + + # This is an optional longer description of your project that represents + # the body of text which users will see when they visit PyPI. + # + # Often, this is the same as your README, so you can just read it in from + # that file directly (as we have already done above) + # + # This field corresponds to the "Description" metadata field: + # https://packaging.python.org/specifications/core-metadata/#description-optional + long_description=long_description, # Optional + + # Denotes that our long_description is in Markdown; valid values are + # text/plain, text/x-rst, and text/markdown + # + # Optional if long_description is written in reStructuredText (rst) but + # required for plain-text or Markdown; if unspecified, "applications should + # attempt to render [the long_description] as text/x-rst; charset=UTF-8 and + # fall back to text/plain if it is not valid rst" (see link below) + # + # This field corresponds to the "Description-Content-Type" metadata field: + # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional + long_description_content_type='text/markdown', # Optional (see note above) + + # This should be a valid link to your project's main homepage. + # + # This field corresponds to the "Home-Page" metadata field: + # https://packaging.python.org/specifications/core-metadata/#home-page-optional + url='https://github.com/HLT-ISTI/QuaPy', # Optional + + maintainer='Alejandro Moreo', + + maintainer_email='alejandro.moreo@isti.cnr.it', + + classifiers=[ + 'Development Status :: 4 - Beta', + + 'Intended Audience :: Developers', + 'Intended Audience :: Science/Research', + 'Programming Language :: Python', + 'Topic :: Software Development', + 'Topic :: Scientific/Engineering', + + 'License :: OSI Approved :: BSD License', + + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3 :: Only', + ], + + keywords='machine learning, quantification, classification, prevalence estimation, priors estimate', + + # When your source code is in a subdirectory under the project root, e.g. + # `src/`, it is necessary to specify the `package_dir` argument. + #package_dir={'': 'src'}, # Optional + + # You can just specify package directories manually here if your project is + # simple. Or you can use find_packages(). + # + # Alternatively, if you just want to distribute a single Python file, use + # the `py_modules` argument instead as follows, which will expect a file + # called `my_module.py` to exist: + # + # py_modules=["my_module"], + # + packages=find_packages(include=['quapy', 'quapy.*']), # Required + + python_requires='>=3.6, <4', + + install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib'], + + # List additional groups of dependencies here (e.g. development + # dependencies). Users will be able to install these using the "extras" + # syntax, for example: + # + # $ pip install sampleproject[dev] + # + # Similar to `install_requires` above, these must be valid existing + # projects. + # extras_require={ # Optional + # 'dev': ['check-manifest'], + # 'test': ['coverage'], + # }, + + # If there are data files included in your packages that need to be + # installed, specify them here. + # package_data={ # Optional + # 'sample': ['package_data.dat'], + # }, + + # Although 'package_data' is the preferred approach, in some case you may + # need to place data files outside of your packages. See: + # http://docs.python.org/distutils/setupscript.html#installing-additional-files + # + # In this case, 'data_file' will be installed into '/my_data' + # data_files=[('my_data', ['data/data_file'])], # Optional + + # To provide executable scripts, use entry points in preference to the + # "scripts" keyword. Entry points provide cross-platform support and allow + # `pip` to create the appropriate form of executable for the target + # platform. + # + # For example, the following would provide a command called `sample` which + # executes the function `main` from this package when invoked: + # entry_points={ # Optional + # 'console_scripts': [ + # 'sample=sample:main', + # ], + # }, + + project_urls={ # Optional + 'Contributors': 'https://github.com/HLT-ISTI/QuaPy/graphs/contributors', + 'Bug Reports': 'https://github.com/HLT-ISTI/QuaPy/issues', + 'Documentation': 'https://github.com/HLT-ISTI/QuaPy/wiki', + 'Source': 'https://github.com/HLT-ISTI/QuaPy/', + }, +)