forked from moreo/QuaPy
trying to understand the poor performance of quantifiers with rel sampling
This commit is contained in:
parent
6ea627449c
commit
1b1f41dc28
|
@ -49,8 +49,9 @@ def NewQuantifier(quantifiername, classifiername):
|
|||
def newQ():
|
||||
# return PACC(NewClassifier(classifiername), val_split=0.4)
|
||||
# return CC(CalibratedClassifierCV(NewClassifier(classifiername)))
|
||||
return ClassWeightPCC()
|
||||
return RegionProbAdjustmentGlobal(newQ, k=10, clustering='kmeans')
|
||||
# return ClassWeightPCC()
|
||||
return CC(NewClassifier(classifiername))
|
||||
return RegionProbAdjustmentGlobal(newQ, k=20, clustering='kmeans')
|
||||
raise ValueError('unknown quantifier', quantifiername)
|
||||
|
||||
|
||||
|
|
|
@ -203,7 +203,7 @@ class RegionProbAdjustmentGlobal(BaseQuantifier):
|
|||
# g = self._get_regions(data.instances)
|
||||
X, y = data.Xy
|
||||
self.g_quantifiers = {}
|
||||
trivial=0
|
||||
trivial, trivial_data = 0, 0
|
||||
for gi in np.unique(g):
|
||||
qi_data = LabelledCollection(X[g==gi], y[g==gi], classes_=data.classes_)
|
||||
if qi_data.counts()[1] <= 1:
|
||||
|
@ -213,12 +213,14 @@ class RegionProbAdjustmentGlobal(BaseQuantifier):
|
|||
# if qi_data.prevalence()[0] == 1: # all negatives
|
||||
self.g_quantifiers[gi] = TrivialRejectorQuantifier()
|
||||
trivial+=1
|
||||
trivial_data += len(qi_data)
|
||||
elif qi_data.counts()[0] <= 1: # (almost) all positives
|
||||
self.g_quantifiers[gi] = TrivialAcceptorQuantifier()
|
||||
trivial += 1
|
||||
trivial_data += len(qi_data)
|
||||
else:
|
||||
self.g_quantifiers[gi] = self.quantifier_fn().fit(qi_data)
|
||||
print(f'trivials={trivial}')
|
||||
print(f'trivials={trivial} amounting to {trivial_data*100.0/len(data):.2f}% of the data')
|
||||
|
||||
return self
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ k=100
|
|||
initsize=500
|
||||
initprev=-1
|
||||
seed=1
|
||||
Q=GRPACC
|
||||
Q=URBQ
|
||||
CLS=lr
|
||||
sampling=relevance_sampling
|
||||
|
||||
|
|
Loading…
Reference in New Issue