forked from moreo/QuaPy
copying a modification from devel
This commit is contained in:
parent
3932cf22ce
commit
29eaa54d82
quapy/data
|
@ -108,8 +108,7 @@ class LabelledCollection:
|
||||||
"""
|
"""
|
||||||
Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
|
Returns an index to be used to extract a random sample of desired size and desired prevalence values. If the
|
||||||
prevalence values are not specified, then returns the index of a uniform sampling.
|
prevalence values are not specified, then returns the index of a uniform sampling.
|
||||||
For each class, the sampling is drawn with replacement if the requested prevalence is larger than
|
For each class, the sampling is drawn with replacement.
|
||||||
the actual prevalence of the class, or without replacement otherwise.
|
|
||||||
|
|
||||||
:param size: integer, the requested size
|
:param size: integer, the requested size
|
||||||
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
|
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
|
||||||
|
@ -153,7 +152,7 @@ class LabelledCollection:
|
||||||
for class_, n_requested in n_requests.items():
|
for class_, n_requested in n_requests.items():
|
||||||
n_candidates = len(self.index[class_])
|
n_candidates = len(self.index[class_])
|
||||||
index_sample = self.index[class_][
|
index_sample = self.index[class_][
|
||||||
np.random.choice(n_candidates, size=n_requested, replace=(n_requested > n_candidates))
|
np.random.choice(n_candidates, size=n_requested, replace=True)
|
||||||
] if n_requested > 0 else []
|
] if n_requested > 0 else []
|
||||||
|
|
||||||
indexes_sample.append(index_sample)
|
indexes_sample.append(index_sample)
|
||||||
|
@ -168,8 +167,7 @@ class LabelledCollection:
|
||||||
def uniform_sampling_index(self, size, random_state=None):
|
def uniform_sampling_index(self, size, random_state=None):
|
||||||
"""
|
"""
|
||||||
Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
|
Returns an index to be used to extract a uniform sample of desired size. The sampling is drawn
|
||||||
with replacement if the requested size is greater than the number of instances, or without replacement
|
with replacement.
|
||||||
otherwise.
|
|
||||||
|
|
||||||
:param size: integer, the size of the uniform sample
|
:param size: integer, the size of the uniform sample
|
||||||
:param random_state: if specified, guarantees reproducibility of the split.
|
:param random_state: if specified, guarantees reproducibility of the split.
|
||||||
|
@ -179,13 +177,12 @@ class LabelledCollection:
|
||||||
ng = RandomState(seed=random_state)
|
ng = RandomState(seed=random_state)
|
||||||
else:
|
else:
|
||||||
ng = np.random
|
ng = np.random
|
||||||
return ng.choice(len(self), size, replace=size > len(self))
|
return ng.choice(len(self), size, replace=True)
|
||||||
|
|
||||||
def sampling(self, size, *prevs, shuffle=True, random_state=None):
|
def sampling(self, size, *prevs, shuffle=True, random_state=None):
|
||||||
"""
|
"""
|
||||||
Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence
|
Return a random sample (an instance of :class:`LabelledCollection`) of desired size and desired prevalence
|
||||||
values. For each class, the sampling is drawn without replacement if the requested prevalence is larger than
|
values. For each class, the sampling is drawn with replacement.
|
||||||
the actual prevalence of the class, or with replacement otherwise.
|
|
||||||
|
|
||||||
:param size: integer, the requested size
|
:param size: integer, the requested size
|
||||||
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
|
:param prevs: the prevalence for each class; the prevalence value for the last class can be lead empty since
|
||||||
|
@ -202,8 +199,7 @@ class LabelledCollection:
|
||||||
def uniform_sampling(self, size, random_state=None):
|
def uniform_sampling(self, size, random_state=None):
|
||||||
"""
|
"""
|
||||||
Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn
|
Returns a uniform sample (an instance of :class:`LabelledCollection`) of desired size. The sampling is drawn
|
||||||
with replacement if the requested size is greater than the number of instances, or without replacement
|
with replacement.
|
||||||
otherwise.
|
|
||||||
|
|
||||||
:param size: integer, the requested size
|
:param size: integer, the requested size
|
||||||
:param random_state: if specified, guarantees reproducibility of the split.
|
:param random_state: if specified, guarantees reproducibility of the split.
|
||||||
|
@ -236,24 +232,11 @@ class LabelledCollection:
|
||||||
:return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the
|
:return: two instances of :class:`LabelledCollection`, the first one with `train_prop` elements, and the
|
||||||
second one with `1-train_prop` elements
|
second one with `1-train_prop` elements
|
||||||
"""
|
"""
|
||||||
instances = self.instances
|
|
||||||
labels = self.labels
|
|
||||||
remainder = None
|
|
||||||
for idx in np.argwhere(self.counts()==1):
|
|
||||||
class_with_1 = self.classes_[idx.item()]
|
|
||||||
if remainder is None:
|
|
||||||
remainder = LabelledCollection(instances[labels==class_with_1], [class_with_1], classes=self.classes_)
|
|
||||||
else:
|
|
||||||
remainder += LabelledCollection(instances[labels==class_with_1], [class_with_1], classes=self.classes_)
|
|
||||||
instances = instances[labels!=class_with_1]
|
|
||||||
labels = labels[labels!=class_with_1]
|
|
||||||
tr_docs, te_docs, tr_labels, te_labels = train_test_split(
|
tr_docs, te_docs, tr_labels, te_labels = train_test_split(
|
||||||
instances, labels, train_size=train_prop, stratify=labels, random_state=random_state
|
self.instances, self.labels, train_size=train_prop, stratify=self.labels, random_state=random_state
|
||||||
)
|
)
|
||||||
training = LabelledCollection(tr_docs, tr_labels, classes=self.classes_)
|
training = LabelledCollection(tr_docs, tr_labels, classes=self.classes_)
|
||||||
test = LabelledCollection(te_docs, te_labels, classes=self.classes_)
|
test = LabelledCollection(te_docs, te_labels, classes=self.classes_)
|
||||||
if remainder is not None:
|
|
||||||
training += remainder
|
|
||||||
return training, test
|
return training, test
|
||||||
|
|
||||||
def split_random(self, train_prop=0.6, random_state=None):
|
def split_random(self, train_prop=0.6, random_state=None):
|
||||||
|
|
Loading…
Reference in New Issue