pip package

2021-05-10 13:36:35 +02:00 · 2021-05-10 13:36:35 +02:00 · 79fbbd9d80
parent c280c03fdb
commit 79fbbd9d80
5 changed files with 204 additions and 30 deletions
--- a/TODO.txt
+++ b/TODO.txt
@ -2,7 +2,6 @@ Packaging:
 ==========================================
 Documentation with sphinx
 Document methods with paper references
-allow for "pip install"
 unit-tests

 New features:
--- a/quapy/init.py
+++ b/quapy/init.py
@ -10,7 +10,7 @@ from . import model_selection
 from . import classification
 from quapy.method.base import isprobabilistic, isaggregative

-__version__ = '0.1'
+__version__ = '0.1.4'

 environ = {
    'SAMPLE_SIZE': None,
--- a/quapy/method/meta.py
+++ b/quapy/method/meta.py
@ -1,28 +1,32 @@
 from copy import deepcopy
 from typing import Union

+import numpy as np
+from sklearn.linear_model import LogisticRegression
 from sklearn.metrics import f1_score, make_scorer, accuracy_score
+from sklearn.model_selection import GridSearchCV, cross_val_predict
 from tqdm import tqdm

-import numpy as np
-from joblib import Parallel, delayed
-from sklearn.linear_model import LogisticRegression
-from sklearn.model_selection import GridSearchCV, cross_val_predict
-
 import quapy as qp
-from quapy.data import LabelledCollection
 from quapy import functional as F
+from quapy.data import LabelledCollection
 from quapy.evaluation import evaluate
 from quapy.model_selection import GridSearchQ
-from . import neural
-from .base import BaseQuantifier
-from quapy.method.aggregative import CC, ACC, PCC, PACC, HDy, EMQ

-QuaNet = neural.QuaNetTrainer
+try:
+    from . import neural
+except ModuleNotFoundError:
+    neural = None
+from .base import BaseQuantifier
+from quapy.method.aggregative import CC, ACC, PACC, HDy, EMQ
+
+if neural:
+    QuaNet = neural.QuaNetTrainer
+else:
+    QuaNet = "QuaNet is not available due to missing torch package"


 class Ensemble(BaseQuantifier):
-
    VALID_POLICIES = {'ave', 'ptr', 'ds'} | qp.error.QUANTIFICATION_ERROR_NAMES

    """
@ -65,9 +69,9 @@ class Ensemble(BaseQuantifier):
        if self.verbose:
            print('[Ensemble]' + msg)

-    def fit(self, data: qp.data.LabelledCollection, val_split: Union[qp.data.LabelledCollection, float]=None):
+    def fit(self, data: qp.data.LabelledCollection, val_split: Union[qp.data.LabelledCollection, float] = None):
        self.sout('Fit')
-        if self.policy=='ds' and not data.binary:
+        if self.policy == 'ds' and not data.binary:
            raise ValueError(f'ds policy is only defined for binary quantification, but this dataset is not binary')
        if val_split is None:
            val_split = self.val_split
@ -132,7 +136,7 @@ class Ensemble(BaseQuantifier):
        tests = [m[3] for m in self.ensemble]
        scores = []
        for i, model in enumerate(self.ensemble):
-            scores.append(evaluate(model[0], tests[:i] + tests[i+1:], error, self.n_jobs))
+            scores.append(evaluate(model[0], tests[:i] + tests[i + 1:], error, self.n_jobs))
        order = np.argsort(scores)

        self.ensemble = _select_k(self.ensemble, order, k=self.red_size)
@ -168,7 +172,7 @@ class Ensemble(BaseQuantifier):
        lr_base = LogisticRegression(class_weight='balanced', max_iter=1000)

        optim = GridSearchCV(
-            lr_base, param_grid={'C': np.logspace(-4,4,9)}, cv=5, n_jobs=self.n_jobs, refit=True
+            lr_base, param_grid={'C': np.logspace(-4, 4, 9)}, cv=5, n_jobs=self.n_jobs, refit=True
        ).fit(X, y)

        posteriors = cross_val_predict(
@ -204,8 +208,8 @@ class Ensemble(BaseQuantifier):


 def get_probability_distribution(posterior_probabilities, bins=8):
-    assert posterior_probabilities.shape[1]==2, 'the posterior probabilities do not seem to be for a binary problem'
-    posterior_probabilities = posterior_probabilities[:,1]  # take the positive posteriors only
+    assert posterior_probabilities.shape[1] == 2, 'the posterior probabilities do not seem to be for a binary problem'
+    posterior_probabilities = posterior_probabilities[:, 1]  # take the positive posteriors only
    distribution, _ = np.histogram(posterior_probabilities, bins=bins, range=(0, 1), density=True)
    return distribution

@ -223,7 +227,7 @@ def _delayed_new_instance(args):
    if val_split is not None:
        if isinstance(val_split, float):
            assert 0 < val_split < 1, 'val_split should be in (0,1)'
-            data, val_split = data.split_stratified(train_prop=1-val_split)
+            data, val_split = data.split_stratified(train_prop=1 - val_split)

    sample_index = data.sampling_index(sample_size, *prev)
    sample = data.sampling_from_index(sample_index)
@ -255,7 +259,7 @@ def _draw_simplex(ndim, min_val, max_trials=100):
    :return: a sample from the ndim-dimensional simplex that is uniform in S(ndim)-R where S(ndim) is the simplex
    and R is the simplex subset containing dimensions lower than min_val
    """
-    if min_val >= 1/ndim:
+    if min_val >= 1 / ndim:
        raise ValueError(f'no sample can be draw from the {ndim}-dimensional simplex so that '
                         f'all its values are >={min_val} (try with a larger value for min_pos)')
    trials = 0
@ -300,14 +304,15 @@ def _check_error(error):
                         f'the name of an error function in {qp.error.ERROR_NAMES}')


-def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, param_model_sel:dict=None, **kwargs):
-        if optim is not None:
-            if param_grid is None:
-                raise ValueError(f'param_grid is None but optim was requested.')
-            if param_model_sel is None:
-                raise ValueError(f'param_model_sel is None but optim was requested.')
-        error = _check_error(optim)
-        return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel, **kwargs)
+def ensembleFactory(learner, base_quantifier_class, param_grid=None, optim=None, param_model_sel: dict = None,
+                    **kwargs):
+    if optim is not None:
+        if param_grid is None:
+            raise ValueError(f'param_grid is None but optim was requested.')
+        if param_model_sel is None:
+            raise ValueError(f'param_model_sel is None but optim was requested.')
+    error = _check_error(optim)
+    return _instantiate_ensemble(learner, base_quantifier_class, param_grid, error, param_model_sel, **kwargs)


 def ECC(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs):
@ -327,4 +332,4 @@ def EHDy(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs):


 def EEMQ(learner, param_grid=None, optim=None, param_mod_sel=None, **kwargs):
-    return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, **kwargs)
+    return ensembleFactory(learner, EMQ, param_grid, optim, param_mod_sel, **kwargs)
--- a/quapy/tests/test_methods.py
+++ b/quapy/tests/test_methods.py
@ -100,6 +100,12 @@ def test_ensemble_method(base_method, learner, dataset: Dataset, policy):


 def test_quanet_method():
+    try:
+        import quapy.classification.neural
+    except ModuleNotFoundError:
+        print('skipping QuaNet test due to missing torch package')
+        return
+
    dataset = qp.datasets.fetch_reviews('kindle', pickle=True)
    dataset = Dataset(dataset.training.sampling(100, *dataset.training.prevalence()),
                      dataset.test.sampling(100, *dataset.test.prevalence()))
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,164 @@
+from setuptools import setup, find_packages
+import pathlib
+
+here = pathlib.Path(__file__).parent.resolve()
+
+long_description = (here / 'README.md').read_text(encoding='utf-8')
+
+
+def get_version(rel_path):
+    init_content = (here / rel_path).read_text(encoding='utf-8')
+    for line in init_content.split('\n'):
+        if line.startswith('__version__'):
+            delim = '"' if '"' in line else "'"
+            return line.split(delim)[1]
+    else:
+        raise RuntimeError("Unable to find version string.")
+# Arguments marked as "Required" below must be included for upload to PyPI.
+# Fields marked as "Optional" may be commented out.
+
+setup(
+    # This is the name of your project. The first time you publish this
+    # package, this name will be registered for you. It will determine how
+    # users can install this project, e.g.:
+    #
+    # $ pip install sampleproject
+    #
+    # And where it will live on PyPI: https://pypi.org/project/sampleproject/
+    #
+    # There are some restrictions on what makes a valid project name
+    # specification here:
+    # https://packaging.python.org/specifications/core-metadata/#name
+    name='QuaPy',  # Required
+
+    # Versions should comply with PEP 440:
+    # https://www.python.org/dev/peps/pep-0440/
+    #
+    # For a discussion on single-sourcing the version across setup.py and the
+    # project code, see
+    # https://packaging.python.org/en/latest/single_source_version.html
+    version=get_version("quapy/__init__.py"),  # Required
+
+    # This is a one-line description or tagline of what your project does. This
+    # corresponds to the "Summary" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#summary
+    description='QuaPy: a framework for Quantification in Python',  # Optional
+
+    # This is an optional longer description of your project that represents
+    # the body of text which users will see when they visit PyPI.
+    #
+    # Often, this is the same as your README, so you can just read it in from
+    # that file directly (as we have already done above)
+    #
+    # This field corresponds to the "Description" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#description-optional
+    long_description=long_description,  # Optional
+
+    # Denotes that our long_description is in Markdown; valid values are
+    # text/plain, text/x-rst, and text/markdown
+    #
+    # Optional if long_description is written in reStructuredText (rst) but
+    # required for plain-text or Markdown; if unspecified, "applications should
+    # attempt to render [the long_description] as text/x-rst; charset=UTF-8 and
+    # fall back to text/plain if it is not valid rst" (see link below)
+    #
+    # This field corresponds to the "Description-Content-Type" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#description-content-type-optional
+    long_description_content_type='text/markdown',  # Optional (see note above)
+
+    # This should be a valid link to your project's main homepage.
+    #
+    # This field corresponds to the "Home-Page" metadata field:
+    # https://packaging.python.org/specifications/core-metadata/#home-page-optional
+    url='https://github.com/HLT-ISTI/QuaPy',  # Optional
+
+    maintainer='Alejandro Moreo',
+
+    maintainer_email='alejandro.moreo@isti.cnr.it',
+
+    classifiers=[
+        'Development Status :: 4 - Beta',
+
+        'Intended Audience :: Developers',
+        'Intended Audience :: Science/Research',
+        'Programming Language :: Python',
+        'Topic :: Software Development',
+        'Topic :: Scientific/Engineering',
+
+        'License :: OSI Approved :: BSD License',
+
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
+        'Programming Language :: Python :: 3 :: Only',
+    ],
+
+    keywords='machine learning, quantification, classification, prevalence estimation, priors estimate',
+
+    # When your source code is in a subdirectory under the project root, e.g.
+    # `src/`, it is necessary to specify the `package_dir` argument.
+    #package_dir={'': 'src'},  # Optional
+
+    # You can just specify package directories manually here if your project is
+    # simple. Or you can use find_packages().
+    #
+    # Alternatively, if you just want to distribute a single Python file, use
+    # the `py_modules` argument instead as follows, which will expect a file
+    # called `my_module.py` to exist:
+    #
+    #   py_modules=["my_module"],
+    #
+    packages=find_packages(include=['quapy', 'quapy.*']),  # Required
+
+    python_requires='>=3.6, <4',
+
+    install_requires=['scikit-learn', 'pandas', 'tqdm', 'matplotlib'],
+
+    # List additional groups of dependencies here (e.g. development
+    # dependencies). Users will be able to install these using the "extras"
+    # syntax, for example:
+    #
+    #   $ pip install sampleproject[dev]
+    #
+    # Similar to `install_requires` above, these must be valid existing
+    # projects.
+    # extras_require={  # Optional
+    #     'dev': ['check-manifest'],
+    #     'test': ['coverage'],
+    # },
+
+    # If there are data files included in your packages that need to be
+    # installed, specify them here.
+    # package_data={  # Optional
+    #     'sample': ['package_data.dat'],
+    # },
+
+    # Although 'package_data' is the preferred approach, in some case you may
+    # need to place data files outside of your packages. See:
+    # http://docs.python.org/distutils/setupscript.html#installing-additional-files
+    #
+    # In this case, 'data_file' will be installed into '<sys.prefix>/my_data'
+    # data_files=[('my_data', ['data/data_file'])],  # Optional
+
+    # To provide executable scripts, use entry points in preference to the
+    # "scripts" keyword. Entry points provide cross-platform support and allow
+    # `pip` to create the appropriate form of executable for the target
+    # platform.
+    #
+    # For example, the following would provide a command called `sample` which
+    # executes the function `main` from this package when invoked:
+    # entry_points={  # Optional
+    #     'console_scripts': [
+    #         'sample=sample:main',
+    #     ],
+    # },
+
+    project_urls={  # Optional
+        'Contributors': 'https://github.com/HLT-ISTI/QuaPy/graphs/contributors',
+        'Bug Reports': 'https://github.com/HLT-ISTI/QuaPy/issues',
+        'Documentation': 'https://github.com/HLT-ISTI/QuaPy/wiki',
+        'Source': 'https://github.com/HLT-ISTI/QuaPy/',
+    },
+)