starting refactor...
This commit is contained in:
parent
9274ea21aa
commit
3051c08184
|
@ -75,11 +75,12 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
empty_class_names = data.classes_[empty_classes]
|
empty_class_names = data.classes_[empty_classes]
|
||||||
raise ValueError(f'classes {empty_class_names} have no training examples')
|
raise ValueError(f'classes {empty_class_names} have no training examples')
|
||||||
|
|
||||||
def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
|
def fit(self, X, y, fit_classifier=True, val_split=None):
|
||||||
"""
|
"""
|
||||||
Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
|
Trains the aggregative quantifier. This comes down to training a classifier and an aggregation function.
|
||||||
|
|
||||||
:param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
|
:param X: `array-like` of shape `(n_samples, n_features)` consisting of the training covariates
|
||||||
|
:param y: `array-like` of shape `(n_samples,)` consisting of the instances labels
|
||||||
:param fit_classifier: whether to train the learner (default is True). Set to False if the
|
:param fit_classifier: whether to train the learner (default is True). Set to False if the
|
||||||
learner has been trained outside the quantifier.
|
learner has been trained outside the quantifier.
|
||||||
:param val_split: specifies the data used for generating classifier predictions. This specification
|
:param val_split: specifies the data used for generating classifier predictions. This specification
|
||||||
|
@ -92,16 +93,17 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
:return: self
|
:return: self
|
||||||
"""
|
"""
|
||||||
self._check_init_parameters()
|
self._check_init_parameters()
|
||||||
classif_predictions = self.classifier_fit_predict(data, fit_classifier, predict_on=val_split)
|
P, y = self.classifier_fit_predict(X, y, fit_classifier, predict_on=val_split)
|
||||||
self.aggregation_fit(classif_predictions, data)
|
self.aggregation_fit(P, y)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def classifier_fit_predict(self, data: LabelledCollection, fit_classifier=True, predict_on=None):
|
def classifier_fit_predict(self, X, y, fit_classifier=True, predict_on=None):
|
||||||
"""
|
"""
|
||||||
Trains the classifier if requested (`fit_classifier=True`) and generate the necessary predictions to
|
Trains the classifier if requested (`fit_classifier=True`) and generate the necessary predictions to
|
||||||
train the aggregation function.
|
train the aggregation function.
|
||||||
|
|
||||||
:param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
|
:param X: `array-like` of shape `(n_samples, n_features)` consisting of the training covariates
|
||||||
|
:param y: `array-like` of shape `(n_samples,)` consisting of the instances labels
|
||||||
:param fit_classifier: whether to train the learner (default is True). Set to False if the
|
:param fit_classifier: whether to train the learner (default is True). Set to False if the
|
||||||
learner has been trained outside the quantifier.
|
learner has been trained outside the quantifier.
|
||||||
:param predict_on: specifies the set on which predictions need to be issued. This parameter can
|
:param predict_on: specifies the set on which predictions need to be issued. This parameter can
|
||||||
|
@ -113,10 +115,11 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
"""
|
"""
|
||||||
assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean'
|
assert isinstance(fit_classifier, bool), 'unexpected type for "fit_classifier", must be boolean'
|
||||||
|
|
||||||
|
data = LabelledCollection(X, y)
|
||||||
self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
|
self._check_classifier(adapt_if_necessary=(self._classifier_method() == 'predict_proba'))
|
||||||
|
|
||||||
if fit_classifier:
|
if fit_classifier:
|
||||||
self._check_non_empty_classes(data)
|
self._check_non_empty_classes(y)
|
||||||
|
|
||||||
if predict_on is None:
|
if predict_on is None:
|
||||||
if not fit_classifier:
|
if not fit_classifier:
|
||||||
|
@ -170,16 +173,16 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
f'use either a float indicating the split proportion, or a '
|
f'use either a float indicating the split proportion, or a '
|
||||||
f'tuple (X,y) indicating the validation partition')
|
f'tuple (X,y) indicating the validation partition')
|
||||||
|
|
||||||
return predictions
|
return predictions.Xy
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: np.ndarray, y: np.ndarray):
|
||||||
"""
|
"""
|
||||||
Trains the aggregation function.
|
Trains the aggregation function.
|
||||||
|
|
||||||
:param classif_predictions: a :class:`quapy.data.base.LabelledCollection` containing,
|
:param classif_predictions: `array-like` of shape `(n_samples, n_classes)` consisting of the classifier
|
||||||
as instances, the predictions issued by the classifier and, as labels, the true labels
|
predictions for each class
|
||||||
:param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
|
:param y: `array-like` of shape `(n_samples,)` consisting of the instances labels
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@ -201,16 +204,16 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
"""
|
"""
|
||||||
self.classifier_ = classifier
|
self.classifier_ = classifier
|
||||||
|
|
||||||
def classify(self, instances):
|
def classify(self, X):
|
||||||
"""
|
"""
|
||||||
Provides the label predictions for the given instances. The predictions should respect the format expected by
|
Provides the label predictions for the given instances. The predictions should respect the format expected by
|
||||||
:meth:`aggregate`, e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for
|
:meth:`aggregate`, e.g., posterior probabilities for probabilistic quantifiers, or crisp predictions for
|
||||||
non-probabilistic quantifiers. The default one is "decision_function".
|
non-probabilistic quantifiers. The default one is "decision_function".
|
||||||
|
|
||||||
:param instances: array-like of shape `(n_instances, n_features,)`
|
:param X: array-like of shape `(n_instances, n_features,)`
|
||||||
:return: np.ndarray of shape `(n_instances,)` with label predictions
|
:return: np.ndarray of shape `(n_instances,)` with label predictions
|
||||||
"""
|
"""
|
||||||
return getattr(self.classifier, self._classifier_method())(instances)
|
return getattr(self.classifier, self._classifier_method())(X)
|
||||||
|
|
||||||
def _classifier_method(self):
|
def _classifier_method(self):
|
||||||
"""
|
"""
|
||||||
|
@ -230,15 +233,15 @@ class AggregativeQuantifier(BaseQuantifier, ABC):
|
||||||
assert hasattr(self.classifier, self._classifier_method()), \
|
assert hasattr(self.classifier, self._classifier_method()), \
|
||||||
f"the method does not implement the required {self._classifier_method()} method"
|
f"the method does not implement the required {self._classifier_method()} method"
|
||||||
|
|
||||||
def quantify(self, instances):
|
def quantify(self, X):
|
||||||
"""
|
"""
|
||||||
Generate class prevalence estimates for the sample's instances by aggregating the label predictions generated
|
Generate class prevalence estimates for the sample's instances by aggregating the label predictions generated
|
||||||
by the classifier.
|
by the classifier.
|
||||||
|
|
||||||
:param instances: array-like
|
:param X: array-like
|
||||||
:return: `np.ndarray` of shape `(n_classes)` with class prevalence estimates.
|
:return: `np.ndarray` of shape `(n_classes)` with class prevalence estimates.
|
||||||
"""
|
"""
|
||||||
classif_predictions = self.classify(instances)
|
classif_predictions = self.classify(X)
|
||||||
return self.aggregate(classif_predictions)
|
return self.aggregate(classif_predictions)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
|
@ -328,9 +331,9 @@ class BinaryAggregativeQuantifier(AggregativeQuantifier, BinaryQuantifier):
|
||||||
def neg_label(self):
|
def neg_label(self):
|
||||||
return self.classifier.classes_[0]
|
return self.classifier.classes_[0]
|
||||||
|
|
||||||
def fit(self, data: LabelledCollection, fit_classifier=True, val_split=None):
|
def fit(self, X, y, fit_classifier=True, val_split=None):
|
||||||
self._check_binary(data, self.__class__.__name__)
|
self._check_binary(y, self.__class__.__name__)
|
||||||
return super().fit(data, fit_classifier, val_split)
|
return super().fit(X, y, fit_classifier, val_split)
|
||||||
|
|
||||||
|
|
||||||
# Methods
|
# Methods
|
||||||
|
@ -346,12 +349,12 @@ class CC(AggregativeCrispQuantifier):
|
||||||
def __init__(self, classifier: BaseEstimator):
|
def __init__(self, classifier: BaseEstimator):
|
||||||
self.classifier = classifier
|
self.classifier = classifier
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: np.ndarray, y: np.ndarray):
|
||||||
"""
|
"""
|
||||||
Nothing to do here!
|
Nothing to do here!
|
||||||
|
|
||||||
:param classif_predictions: not used
|
:param classif_predictions: not used
|
||||||
:param data: not used
|
:param y: not used
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -376,12 +379,12 @@ class PCC(AggregativeSoftQuantifier):
|
||||||
def __init__(self, classifier: BaseEstimator):
|
def __init__(self, classifier: BaseEstimator):
|
||||||
self.classifier = classifier
|
self.classifier = classifier
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_posteriors: np.ndarray, y: np.ndarray):
|
||||||
"""
|
"""
|
||||||
Nothing to do here!
|
Nothing to do here!
|
||||||
|
|
||||||
:param classif_predictions: not used
|
:param classif_posteriors: not used
|
||||||
:param data: not used
|
:param y: not used
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@ -482,17 +485,16 @@ class ACC(AggregativeCrispQuantifier):
|
||||||
if self.norm not in ACC.NORMALIZATIONS:
|
if self.norm not in ACC.NORMALIZATIONS:
|
||||||
raise ValueError(f"unknown normalization; valid ones are {ACC.NORMALIZATIONS}")
|
raise ValueError(f"unknown normalization; valid ones are {ACC.NORMALIZATIONS}")
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: np.ndarray, y: np.ndarray):
|
||||||
"""
|
"""
|
||||||
Estimates the misclassification rates.
|
Estimates the misclassification rates.
|
||||||
|
|
||||||
:param classif_predictions: a :class:`quapy.data.base.LabelledCollection` containing,
|
:param classif_predictions: `array-like` of shape `(n_samples, n_classes)`
|
||||||
as instances, the label predictions issued by the classifier and, as labels, the true labels
|
consisting of the posterior probabilities of the training examples
|
||||||
:param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
|
:param y: `array-like` of shape `(n_samples,)` consisting of the instances labels
|
||||||
"""
|
"""
|
||||||
pred_labels, true_labels = classif_predictions.Xy
|
|
||||||
self.cc = CC(self.classifier)
|
self.cc = CC(self.classifier)
|
||||||
self.Pte_cond_estim_ = ACC.getPteCondEstim(self.classifier.classes_, true_labels, pred_labels)
|
self.Pte_cond_estim_ = ACC.getPteCondEstim(self.classifier.classes_, y, classif_predictions)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def getPteCondEstim(cls, classes, y, y_):
|
def getPteCondEstim(cls, classes, y, y_):
|
||||||
|
@ -593,17 +595,15 @@ class PACC(AggregativeSoftQuantifier):
|
||||||
if self.norm not in ACC.NORMALIZATIONS:
|
if self.norm not in ACC.NORMALIZATIONS:
|
||||||
raise ValueError(f"unknown normalization; valid ones are {ACC.NORMALIZATIONS}")
|
raise ValueError(f"unknown normalization; valid ones are {ACC.NORMALIZATIONS}")
|
||||||
|
|
||||||
def aggregation_fit(self, classif_predictions: LabelledCollection, data: LabelledCollection):
|
def aggregation_fit(self, classif_predictions: np.ndarray, y: np.ndarray):
|
||||||
"""
|
"""
|
||||||
Estimates the misclassification rates
|
Estimates the misclassification rates
|
||||||
|
|
||||||
:param classif_predictions: a :class:`quapy.data.base.LabelledCollection` containing,
|
:param classif_predictions: `array-like` of shape `(n_samples, n_classes)`
|
||||||
as instances, the posterior probabilities issued by the classifier and, as labels, the true labels
|
consisting of the posterior probabilities of the training examples
|
||||||
:param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
|
:param y: `array-like` of shape `(n_samples,)` consisting of the instances labels """
|
||||||
"""
|
|
||||||
posteriors, true_labels = classif_predictions.Xy
|
|
||||||
self.pcc = PCC(self.classifier)
|
self.pcc = PCC(self.classifier)
|
||||||
self.Pte_cond_estim_ = PACC.getPteCondEstim(self.classifier.classes_, true_labels, posteriors)
|
self.Pte_cond_estim_ = PACC.getPteCondEstim(self.classifier.classes_, y, classif_predictions)
|
||||||
|
|
||||||
def aggregate(self, classif_posteriors):
|
def aggregate(self, classif_posteriors):
|
||||||
prevs_estim = self.pcc.aggregate(classif_posteriors)
|
prevs_estim = self.pcc.aggregate(classif_posteriors)
|
||||||
|
|
|
@ -19,21 +19,22 @@ class BaseQuantifier(BaseEstimator):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def fit(self, data: LabelledCollection):
|
def fit(self, X, y):
|
||||||
"""
|
"""
|
||||||
Trains a quantifier.
|
Trains a quantifier.
|
||||||
|
|
||||||
:param data: a :class:`quapy.data.base.LabelledCollection` consisting of the training data
|
:param X: `array-like` of shape `(n_samples, n_features)` consisting of the training covariates
|
||||||
|
:param y: `array-like` of shape `(n_samples,)` consisting of the instances labels
|
||||||
:return: self
|
:return: self
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def quantify(self, instances):
|
def quantify(self, X):
|
||||||
"""
|
"""
|
||||||
Generate class prevalence estimates for the sample's instances
|
Generate class prevalence estimates for the sample's instances
|
||||||
|
|
||||||
:param instances: array-like
|
:param X: `array-like` of shape `(n_samples, n_features)` consisting of the test covariates
|
||||||
:return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.
|
:return: `np.ndarray` of shape `(n_classes,)` with class prevalence estimates.
|
||||||
"""
|
"""
|
||||||
...
|
...
|
||||||
|
@ -45,8 +46,9 @@ class BinaryQuantifier(BaseQuantifier):
|
||||||
(typically, to be interpreted as one class and its complement).
|
(typically, to be interpreted as one class and its complement).
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def _check_binary(self, data: LabelledCollection, quantifier_name):
|
def _check_binary(self, y, quantifier_name):
|
||||||
assert data.binary, f'{quantifier_name} works only on problems of binary classification. ' \
|
n_classes = len(np.unique(y))
|
||||||
|
assert n_classes==2, f'{quantifier_name} works only on problems of binary classification. ' \
|
||||||
f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.'
|
f'Use the class OneVsAll to enable {quantifier_name} work on single-label data.'
|
||||||
|
|
||||||
|
|
||||||
|
@ -78,7 +80,8 @@ class OneVsAllGeneric(OneVsAll, BaseQuantifier):
|
||||||
self.binary_quantifier = binary_quantifier
|
self.binary_quantifier = binary_quantifier
|
||||||
self.n_jobs = qp._get_njobs(n_jobs)
|
self.n_jobs = qp._get_njobs(n_jobs)
|
||||||
|
|
||||||
def fit(self, data: LabelledCollection, fit_classifier=True):
|
def fit(self, X, y, fit_classifier=True):
|
||||||
|
data = LabelledCollection(X, y)
|
||||||
assert not data.binary, f'{self.__class__.__name__} expect non-binary data'
|
assert not data.binary, f'{self.__class__.__name__} expect non-binary data'
|
||||||
assert fit_classifier == True, 'fit_classifier must be True'
|
assert fit_classifier == True, 'fit_classifier must be True'
|
||||||
|
|
||||||
|
@ -93,8 +96,8 @@ class OneVsAllGeneric(OneVsAll, BaseQuantifier):
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
def quantify(self, instances):
|
def quantify(self, X):
|
||||||
prevalences = self._parallel(self._delayed_binary_predict, instances)
|
prevalences = self._parallel(self._delayed_binary_predict, X)
|
||||||
return qp.functional.normalize_prevalence(prevalences)
|
return qp.functional.normalize_prevalence(prevalences)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -8,7 +8,7 @@ from quapy.method.base import BaseQuantifier, BinaryQuantifier
|
||||||
import quapy.functional as F
|
import quapy.functional as F
|
||||||
|
|
||||||
|
|
||||||
class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier):
|
class MLPE(BaseQuantifier):
|
||||||
"""
|
"""
|
||||||
The `Maximum Likelihood Prevalence Estimation` (MLPE) method is a lazy method that assumes there is no prior
|
The `Maximum Likelihood Prevalence Estimation` (MLPE) method is a lazy method that assumes there is no prior
|
||||||
probability shift between training and test instances (put it other way, that the i.i.d. assumpion holds).
|
probability shift between training and test instances (put it other way, that the i.i.d. assumpion holds).
|
||||||
|
@ -20,13 +20,15 @@ class MaximumLikelihoodPrevalenceEstimation(BaseQuantifier):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self._classes_ = None
|
self._classes_ = None
|
||||||
|
|
||||||
def fit(self, data: LabelledCollection):
|
def fit(self, X, y):
|
||||||
"""
|
"""
|
||||||
Computes the training prevalence and stores it.
|
Computes the training prevalence and stores it.
|
||||||
|
|
||||||
:param data: the training sample
|
:param X: `array-like` of shape `(n_samples, n_features)` consisting of the training covariates
|
||||||
|
:param y: `array-like` of shape `(n_samples,)` consisting of the instances labels
|
||||||
:return: self
|
:return: self
|
||||||
"""
|
"""
|
||||||
|
data = LabelledCollection(X, y)
|
||||||
self.estimated_prevalence = data.prevalence()
|
self.estimated_prevalence = data.prevalence()
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
@ -100,7 +102,7 @@ class DMx(BaseQuantifier):
|
||||||
|
|
||||||
return distributions
|
return distributions
|
||||||
|
|
||||||
def fit(self, data: LabelledCollection):
|
def fit(self, X, y):
|
||||||
"""
|
"""
|
||||||
Generates the validation distributions out of the training data (covariates).
|
Generates the validation distributions out of the training data (covariates).
|
||||||
The validation distributions have shape `(n, nfeats, nbins)`, with `n` the number of classes, `nfeats`
|
The validation distributions have shape `(n, nfeats, nbins)`, with `n` the number of classes, `nfeats`
|
||||||
|
@ -109,15 +111,16 @@ class DMx(BaseQuantifier):
|
||||||
training data labelled with class `i`; while `dij = di[j]` is the discrete distribution for feature j in
|
training data labelled with class `i`; while `dij = di[j]` is the discrete distribution for feature j in
|
||||||
training data labelled with class `i`, and `dij[k]` is the fraction of instances with a value in the `k`-th bin.
|
training data labelled with class `i`, and `dij[k]` is the fraction of instances with a value in the `k`-th bin.
|
||||||
|
|
||||||
:param data: the training set
|
:param X: `array-like` of shape `(n_samples, n_features)` consisting of the training covariates
|
||||||
|
:param y: `array-like` of shape `(n_samples,)` consisting of the instances labels
|
||||||
|
:return: self
|
||||||
"""
|
"""
|
||||||
X, y = data.Xy
|
data = LabelledCollection(X, y)
|
||||||
|
|
||||||
self.nfeats = X.shape[1]
|
self.nfeats = X.shape[1]
|
||||||
self.feat_ranges = _get_features_range(X)
|
self.feat_ranges = _get_features_range(X)
|
||||||
|
|
||||||
self.validation_distribution = np.asarray(
|
self.validation_distribution = np.asarray(
|
||||||
[self.__get_distributions(X[y==cat]) for cat in range(data.n_classes)]
|
[self.__get_distributions(X[y==cat]) for cat in range(data.classes_)]
|
||||||
)
|
)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
@ -147,53 +150,53 @@ class DMx(BaseQuantifier):
|
||||||
return F.argmin_prevalence(loss, n_classes, method=self.search)
|
return F.argmin_prevalence(loss, n_classes, method=self.search)
|
||||||
|
|
||||||
|
|
||||||
class ReadMe(BaseQuantifier):
|
# class ReadMe(BaseQuantifier):
|
||||||
|
#
|
||||||
def __init__(self, bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs):
|
# def __init__(self, bootstrap_trials=100, bootstrap_range=100, bagging_trials=100, bagging_range=25, **vectorizer_kwargs):
|
||||||
raise NotImplementedError('under development ...')
|
# raise NotImplementedError('under development ...')
|
||||||
self.bootstrap_trials = bootstrap_trials
|
# self.bootstrap_trials = bootstrap_trials
|
||||||
self.bootstrap_range = bootstrap_range
|
# self.bootstrap_range = bootstrap_range
|
||||||
self.bagging_trials = bagging_trials
|
# self.bagging_trials = bagging_trials
|
||||||
self.bagging_range = bagging_range
|
# self.bagging_range = bagging_range
|
||||||
self.vectorizer_kwargs = vectorizer_kwargs
|
# self.vectorizer_kwargs = vectorizer_kwargs
|
||||||
|
#
|
||||||
def fit(self, data: LabelledCollection):
|
# def fit(self, data: LabelledCollection):
|
||||||
X, y = data.Xy
|
# X, y = data.Xy
|
||||||
self.vectorizer = CountVectorizer(binary=True, **self.vectorizer_kwargs)
|
# self.vectorizer = CountVectorizer(binary=True, **self.vectorizer_kwargs)
|
||||||
X = self.vectorizer.fit_transform(X)
|
# X = self.vectorizer.fit_transform(X)
|
||||||
self.class_conditional_X = {i: X[y==i] for i in range(data.classes_)}
|
# self.class_conditional_X = {i: X[y==i] for i in range(data.classes_)}
|
||||||
|
#
|
||||||
def quantify(self, instances):
|
# def quantify(self, instances):
|
||||||
X = self.vectorizer.transform(instances)
|
# X = self.vectorizer.transform(instances)
|
||||||
|
#
|
||||||
# number of features
|
# # number of features
|
||||||
num_docs, num_feats = X.shape
|
# num_docs, num_feats = X.shape
|
||||||
|
#
|
||||||
# bootstrap
|
# # bootstrap
|
||||||
p_boots = []
|
# p_boots = []
|
||||||
for _ in range(self.bootstrap_trials):
|
# for _ in range(self.bootstrap_trials):
|
||||||
docs_idx = np.random.choice(num_docs, size=self.bootstra_range, replace=False)
|
# docs_idx = np.random.choice(num_docs, size=self.bootstra_range, replace=False)
|
||||||
class_conditional_X = {i: X[docs_idx] for i, X in self.class_conditional_X.items()}
|
# class_conditional_X = {i: X[docs_idx] for i, X in self.class_conditional_X.items()}
|
||||||
Xboot = X[docs_idx]
|
# Xboot = X[docs_idx]
|
||||||
|
#
|
||||||
# bagging
|
# # bagging
|
||||||
p_bags = []
|
# p_bags = []
|
||||||
for _ in range(self.bagging_trials):
|
# for _ in range(self.bagging_trials):
|
||||||
feat_idx = np.random.choice(num_feats, size=self.bagging_range, replace=False)
|
# feat_idx = np.random.choice(num_feats, size=self.bagging_range, replace=False)
|
||||||
class_conditional_Xbag = {i: X[:, feat_idx] for i, X in class_conditional_X.items()}
|
# class_conditional_Xbag = {i: X[:, feat_idx] for i, X in class_conditional_X.items()}
|
||||||
Xbag = Xboot[:,feat_idx]
|
# Xbag = Xboot[:,feat_idx]
|
||||||
p = self.std_constrained_linear_ls(Xbag, class_conditional_Xbag)
|
# p = self.std_constrained_linear_ls(Xbag, class_conditional_Xbag)
|
||||||
p_bags.append(p)
|
# p_bags.append(p)
|
||||||
p_boots.append(np.mean(p_bags, axis=0))
|
# p_boots.append(np.mean(p_bags, axis=0))
|
||||||
|
#
|
||||||
p_mean = np.mean(p_boots, axis=0)
|
# p_mean = np.mean(p_boots, axis=0)
|
||||||
p_std = np.std(p_bags, axis=0)
|
# p_std = np.std(p_bags, axis=0)
|
||||||
|
#
|
||||||
return p_mean
|
# return p_mean
|
||||||
|
#
|
||||||
|
#
|
||||||
def std_constrained_linear_ls(self, X, class_cond_X: dict):
|
# def std_constrained_linear_ls(self, X, class_cond_X: dict):
|
||||||
pass
|
# pass
|
||||||
|
|
||||||
|
|
||||||
def _get_features_range(X):
|
def _get_features_range(X):
|
||||||
|
@ -209,4 +212,5 @@ def _get_features_range(X):
|
||||||
# aliases
|
# aliases
|
||||||
#---------------------------------------------------------------
|
#---------------------------------------------------------------
|
||||||
|
|
||||||
|
MaximumLikelihoodPrevalenceEstimation = MLPE
|
||||||
DistributionMatchingX = DMx
|
DistributionMatchingX = DMx
|
Loading…
Reference in New Issue