Compare commits
No commits in common. "b3ccf71edb1a34a15de44da5d3efd49c500674b4" and "9e6b9c8955ce970597dc864d5dd1d8fbfa86d3af" have entirely different histories.
b3ccf71edb
...
9e6b9c8955
|
|
@ -1,9 +1,3 @@
|
|||
Change Log 0.1.9
|
||||
----------------
|
||||
|
||||
<...>
|
||||
|
||||
|
||||
Change Log 0.1.8
|
||||
----------------
|
||||
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from . import util
|
|||
from . import model_selection
|
||||
from . import classification
|
||||
|
||||
__version__ = '0.1.9'
|
||||
__version__ = '0.1.8'
|
||||
|
||||
environ = {
|
||||
'SAMPLE_SIZE': None,
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class KDEBase:
|
|||
"""
|
||||
return np.exp(kde.score_samples(X))
|
||||
|
||||
def get_mixture_components(self, X, y, classes, bandwidth):
|
||||
def get_mixture_components(self, X, y, n_classes, bandwidth):
|
||||
"""
|
||||
Returns an array containing the mixture components, i.e., the KDE functions for each class.
|
||||
|
||||
|
|
@ -62,7 +62,7 @@ class KDEBase:
|
|||
:param bandwidth: float, the bandwidth of the kernel
|
||||
:return: a list of KernelDensity objects, each fitted with the corresponding class-specific covariates
|
||||
"""
|
||||
return [self.get_kde_function(X[y == cat], bandwidth) for cat in classes]
|
||||
return [self.get_kde_function(X[y == cat], bandwidth) for cat in range(n_classes)]
|
||||
|
||||
|
||||
|
||||
|
|
@ -114,7 +114,7 @@ class KDEyML(AggregativeSoftQuantifier, KDEBase):
|
|||
self.random_state=random_state
|
||||
|
||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||
self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.classes_, self.bandwidth)
|
||||
self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.n_classes, self.bandwidth)
|
||||
return self
|
||||
|
||||
def aggregate(self, posteriors: np.ndarray):
|
||||
|
|
@ -196,7 +196,7 @@ class KDEyHD(AggregativeSoftQuantifier, KDEBase):
|
|||
self.montecarlo_trials = montecarlo_trials
|
||||
|
||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
||||
self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.classes_, self.bandwidth)
|
||||
self.mix_densities = self.get_mixture_components(*classif_predictions.Xy, data.n_classes, self.bandwidth)
|
||||
|
||||
N = self.montecarlo_trials
|
||||
rs = self.random_state
|
||||
|
|
|
|||
|
|
@ -640,8 +640,6 @@ class EMQ(AggregativeSoftQuantifier):
|
|||
raise ValueError('invalid param argument for recalibration method; available ones are '
|
||||
'"nbvs", "bcts", "ts", and "vs".')
|
||||
|
||||
if not np.issubdtype(y.dtype, np.number):
|
||||
y = np.searchsorted(data.classes_, y)
|
||||
self.calibration_function = calibrator(P, np.eye(data.n_classes)[y], posterior_supplied=True)
|
||||
|
||||
if self.exact_train_prev:
|
||||
|
|
@ -683,11 +681,6 @@ class EMQ(AggregativeSoftQuantifier):
|
|||
"""
|
||||
Px = posterior_probabilities
|
||||
Ptr = np.copy(tr_prev)
|
||||
|
||||
if np.product(Ptr) == 0: # some entry is 0; we should smooth the values to avoid 0 division
|
||||
Ptr += epsilon
|
||||
Ptr /= Ptr.sum()
|
||||
|
||||
qs = np.copy(Ptr) # qs (the running estimate) is initialized as the training prevalence
|
||||
|
||||
s, converged = 0, False
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
from typing import Union, Callable
|
||||
import numpy as np
|
||||
from sklearn.feature_extraction.text import CountVectorizer
|
||||
|
||||
from quapy.functional import get_divergence
|
||||
from quapy.data import LabelledCollection
|
||||
|
|
@ -147,53 +146,6 @@ class DMx(BaseQuantifier):
|
|||
return F.argmin_prevalence(loss, n_classes, method=self.search)
|
||||
|
||||
|
||||
class ReadMe(BaseQuantifier):
|
||||
|
||||
def __init__(self, bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs):
|
||||
self.bootstrap_trials = bootstrap_trials
|
||||
self.bootstrap_range = bootstrap_range
|
||||
self.bagging_trials = bagging_trials
|
||||
self.bagging_range = bagging_range
|
||||
self.vectorizer_kwargs = vectorizer_kwargs
|
||||
|
||||
def fit(self, data: LabelledCollection):
|
||||
X, y = data.Xy
|
||||
self.vectorizer = CountVectorizer(binary=True, **self.vectorizer_kwargs)
|
||||
X = self.vectorizer.fit_transform(X)
|
||||
self.class_conditional_X = {i: X[y==i] for i in range(data.classes_)}
|
||||
|
||||
def quantify(self, instances):
|
||||
X = self.vectorizer.transform(instances)
|
||||
|
||||
# number of features
|
||||
num_docs, num_feats = X.shape
|
||||
|
||||
# bootstrap
|
||||
p_boots = []
|
||||
for _ in range(self.bootstrap_trials):
|
||||
docs_idx = np.random.choice(num_docs, size=self.bootstra_range, replace=False)
|
||||
class_conditional_X = {i: X[docs_idx] for i, X in self.class_conditional_X.items()}
|
||||
Xboot = X[docs_idx]
|
||||
|
||||
# bagging
|
||||
p_bags = []
|
||||
for _ in range(self.bagging_trials):
|
||||
feat_idx = np.random.choice(num_feats, size=self.bagging_range, replace=False)
|
||||
class_conditional_Xbag = {i: X[:, feat_idx] for i, X in class_conditional_X.items()}
|
||||
Xbag = Xboot[:,feat_idx]
|
||||
p = self.std_constrained_linear_ls(Xbag, class_conditional_Xbag)
|
||||
p_bags.append(p)
|
||||
p_boots.append(np.mean(p_bags, axis=0))
|
||||
|
||||
p_mean = np.mean(p_boots, axis=0)
|
||||
p_std = np.std(p_bags, axis=0)
|
||||
|
||||
return p_mean
|
||||
|
||||
|
||||
def std_constrained_linear_ls(self, X, class_cond_X: dict):
|
||||
pass
|
||||
|
||||
|
||||
def _get_features_range(X):
|
||||
feat_ranges = []
|
||||
|
|
|
|||
|
|
@ -56,7 +56,6 @@ def parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky'):
|
|||
:param seed: the numeric seed
|
||||
:param asarray: set to True to return a np.ndarray instead of a list
|
||||
:param backend: indicates the backend used for handling parallel works
|
||||
:param open_args: if True, then the delayed function is called on *args_i, instead of on args_i
|
||||
"""
|
||||
def func_dec(environ, seed, *args):
|
||||
qp.environ = environ.copy()
|
||||
|
|
@ -75,40 +74,6 @@ def parallel(func, args, n_jobs, seed=None, asarray=True, backend='loky'):
|
|||
return out
|
||||
|
||||
|
||||
def parallel_unpack(func, args, n_jobs, seed=None, asarray=True, backend='loky'):
|
||||
"""
|
||||
A wrapper of multiprocessing:
|
||||
|
||||
>>> Parallel(n_jobs=n_jobs)(
|
||||
>>> delayed(func)(*args_i) for args_i in args
|
||||
>>> )
|
||||
|
||||
that takes the `quapy.environ` variable as input silently.
|
||||
Seeds the child processes to ensure reproducibility when n_jobs>1.
|
||||
|
||||
:param func: callable
|
||||
:param args: args of func
|
||||
:param seed: the numeric seed
|
||||
:param asarray: set to True to return a np.ndarray instead of a list
|
||||
:param backend: indicates the backend used for handling parallel works
|
||||
"""
|
||||
|
||||
def func_dec(environ, seed, *args):
|
||||
qp.environ = environ.copy()
|
||||
qp.environ['N_JOBS'] = 1
|
||||
# set a context with a temporal seed to ensure results are reproducibles in parallel
|
||||
with ExitStack() as stack:
|
||||
if seed is not None:
|
||||
stack.enter_context(qp.util.temp_seed(seed))
|
||||
return func(*args)
|
||||
|
||||
out = Parallel(n_jobs=n_jobs, backend=backend)(
|
||||
delayed(func_dec)(qp.environ, None if seed is None else seed + i, *args_i) for i, args_i in enumerate(args)
|
||||
)
|
||||
if asarray:
|
||||
out = np.asarray(out)
|
||||
return out
|
||||
|
||||
@contextlib.contextmanager
|
||||
def temp_seed(random_state):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in New Issue