forked from moreo/QuaPy
trying to understand the poor performance of quantifiers with rel sampling
This commit is contained in:
parent
6ea627449c
commit
1b1f41dc28
|
@ -49,8 +49,9 @@ def NewQuantifier(quantifiername, classifiername):
|
||||||
def newQ():
|
def newQ():
|
||||||
# return PACC(NewClassifier(classifiername), val_split=0.4)
|
# return PACC(NewClassifier(classifiername), val_split=0.4)
|
||||||
# return CC(CalibratedClassifierCV(NewClassifier(classifiername)))
|
# return CC(CalibratedClassifierCV(NewClassifier(classifiername)))
|
||||||
return ClassWeightPCC()
|
# return ClassWeightPCC()
|
||||||
return RegionProbAdjustmentGlobal(newQ, k=10, clustering='kmeans')
|
return CC(NewClassifier(classifiername))
|
||||||
|
return RegionProbAdjustmentGlobal(newQ, k=20, clustering='kmeans')
|
||||||
raise ValueError('unknown quantifier', quantifiername)
|
raise ValueError('unknown quantifier', quantifiername)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -203,7 +203,7 @@ class RegionProbAdjustmentGlobal(BaseQuantifier):
|
||||||
# g = self._get_regions(data.instances)
|
# g = self._get_regions(data.instances)
|
||||||
X, y = data.Xy
|
X, y = data.Xy
|
||||||
self.g_quantifiers = {}
|
self.g_quantifiers = {}
|
||||||
trivial=0
|
trivial, trivial_data = 0, 0
|
||||||
for gi in np.unique(g):
|
for gi in np.unique(g):
|
||||||
qi_data = LabelledCollection(X[g==gi], y[g==gi], classes_=data.classes_)
|
qi_data = LabelledCollection(X[g==gi], y[g==gi], classes_=data.classes_)
|
||||||
if qi_data.counts()[1] <= 1:
|
if qi_data.counts()[1] <= 1:
|
||||||
|
@ -213,12 +213,14 @@ class RegionProbAdjustmentGlobal(BaseQuantifier):
|
||||||
# if qi_data.prevalence()[0] == 1: # all negatives
|
# if qi_data.prevalence()[0] == 1: # all negatives
|
||||||
self.g_quantifiers[gi] = TrivialRejectorQuantifier()
|
self.g_quantifiers[gi] = TrivialRejectorQuantifier()
|
||||||
trivial+=1
|
trivial+=1
|
||||||
|
trivial_data += len(qi_data)
|
||||||
elif qi_data.counts()[0] <= 1: # (almost) all positives
|
elif qi_data.counts()[0] <= 1: # (almost) all positives
|
||||||
self.g_quantifiers[gi] = TrivialAcceptorQuantifier()
|
self.g_quantifiers[gi] = TrivialAcceptorQuantifier()
|
||||||
trivial += 1
|
trivial += 1
|
||||||
|
trivial_data += len(qi_data)
|
||||||
else:
|
else:
|
||||||
self.g_quantifiers[gi] = self.quantifier_fn().fit(qi_data)
|
self.g_quantifiers[gi] = self.quantifier_fn().fit(qi_data)
|
||||||
print(f'trivials={trivial}')
|
print(f'trivials={trivial} amounting to {trivial_data*100.0/len(data):.2f}% of the data')
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
|
@ -7,7 +7,7 @@ k=100
|
||||||
initsize=500
|
initsize=500
|
||||||
initprev=-1
|
initprev=-1
|
||||||
seed=1
|
seed=1
|
||||||
Q=GRPACC
|
Q=URBQ
|
||||||
CLS=lr
|
CLS=lr
|
||||||
sampling=relevance_sampling
|
sampling=relevance_sampling
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue