forked from moreo/QuaPy
more testing
This commit is contained in:
parent
833476ebf8
commit
a3a5bd8da0
|
@ -5,7 +5,7 @@ from sklearn.metrics import f1_score
|
||||||
import functions as fn
|
import functions as fn
|
||||||
import quapy as qp
|
import quapy as qp
|
||||||
import argparse
|
import argparse
|
||||||
from data import LabelledCollection
|
from quapy.data import LabelledCollection
|
||||||
|
|
||||||
|
|
||||||
def eval_classifier(learner, test:LabelledCollection):
|
def eval_classifier(learner, test:LabelledCollection):
|
||||||
|
@ -32,7 +32,10 @@ def main(args):
|
||||||
|
|
||||||
with qp.util.temp_seed(args.seed):
|
with qp.util.temp_seed(args.seed):
|
||||||
# initial labelled data selection
|
# initial labelled data selection
|
||||||
idx = collection.sampling_index(init_nD, *init_prev)
|
if args.initprev == -1:
|
||||||
|
idx = collection.sampling_index(init_nD)
|
||||||
|
else:
|
||||||
|
idx = collection.sampling_index(init_nD, *[1 - args.initprev, args.initprev])
|
||||||
train, pool = fn.split_from_index(collection, idx)
|
train, pool = fn.split_from_index(collection, idx)
|
||||||
first_train = LabelledCollection(train.instances, train.labels)
|
first_train = LabelledCollection(train.instances, train.labels)
|
||||||
|
|
||||||
|
@ -50,8 +53,8 @@ def main(args):
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
|
|
||||||
pool_p_hat_cc, classifier = fn.estimate_prev_CC(train, pool)
|
pool_p_hat_cc, classifier = fn.estimate_prev_CC(train, pool, args.classifier)
|
||||||
pool_p_hat, q_classifier = fn.estimate_prev_Q(train, pool, args.quantifier)
|
pool_p_hat, q_classifier = fn.estimate_prev_Q(train, pool, args.quantifier, args.classifier)
|
||||||
|
|
||||||
f1_clf = eval_classifier(classifier, pool)
|
f1_clf = eval_classifier(classifier, pool)
|
||||||
f1_q = eval_classifier(q_classifier, pool)
|
f1_q = eval_classifier(q_classifier, pool)
|
||||||
|
@ -103,13 +106,18 @@ if __name__=='__main__':
|
||||||
parser.add_argument('--initsize', metavar='SIZE', type=int, help='number of labelled documents at the beginning',
|
parser.add_argument('--initsize', metavar='SIZE', type=int, help='number of labelled documents at the beginning',
|
||||||
default=1000)
|
default=1000)
|
||||||
parser.add_argument('--initprev', metavar='PREV', type=float,
|
parser.add_argument('--initprev', metavar='PREV', type=float,
|
||||||
help='prevalence of the initial sample (-1 for uniform sampling)',
|
help='prevalence of the initial sample (-1 for uniform sampling, default)',
|
||||||
default=0.5)
|
default=-1)
|
||||||
parser.add_argument('--seed', metavar='SEED', type=int,
|
parser.add_argument('--seed', metavar='SEED', type=int,
|
||||||
help='random seed',
|
help='random seed',
|
||||||
default=1)
|
default=1)
|
||||||
|
parser.add_argument('--classifier', metavar='CLS', type=str,
|
||||||
|
help='classifier type (svm, lr)',
|
||||||
|
default='lr')
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
assert 0 < args.initprev < 1, 'wrong value for initsize; should be in (0., 1.)'
|
assert args.initprev==-1.0 or (0 < args.initprev < 1), 'wrong value for initsize; should be in (0., 1.)'
|
||||||
|
if args.initprev==-1: # this is to clean the path, to show initprev:-1 and not initprev:-1.0
|
||||||
|
args.initprev = int(args.initprev)
|
||||||
|
|
||||||
main(args)
|
main(args)
|
||||||
|
|
|
@ -5,9 +5,11 @@ import sys, os, pathlib
|
||||||
|
|
||||||
assert len(sys.argv) == 3, f'wrong args, syntax is: python {sys.argv[0]} <result_input_path> <dynamic (0|1)>'
|
assert len(sys.argv) == 3, f'wrong args, syntax is: python {sys.argv[0]} <result_input_path> <dynamic (0|1)>'
|
||||||
|
|
||||||
file = sys.argv[1]
|
file = str(sys.argv[1])
|
||||||
loop = bool(int(sys.argv[2]))
|
loop = bool(int(sys.argv[2]))
|
||||||
|
|
||||||
|
print(file)
|
||||||
|
|
||||||
plotname = pathlib.Path(file).name.replace(".csv", ".png")
|
plotname = pathlib.Path(file).name.replace(".csv", ".png")
|
||||||
|
|
||||||
if not loop:
|
if not loop:
|
||||||
|
@ -18,7 +20,6 @@ if not loop:
|
||||||
fig, axs = plt.subplots(5)
|
fig, axs = plt.subplots(5)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
while True:
|
while True:
|
||||||
aXn = 0
|
aXn = 0
|
||||||
|
@ -34,7 +35,7 @@ try:
|
||||||
axs[aXn].plot(xs, y_r, label='$R$')
|
axs[aXn].plot(xs, y_r, label='$R$')
|
||||||
axs[aXn].legend()
|
axs[aXn].legend()
|
||||||
axs[aXn].grid()
|
axs[aXn].grid()
|
||||||
axs[aXn].set_ylabel('Recall estimation')
|
axs[aXn].set_ylabel('Recall')
|
||||||
axs[aXn].set_ylim(0,1)
|
axs[aXn].set_ylim(0,1)
|
||||||
aXn+=1
|
aXn+=1
|
||||||
|
|
||||||
|
@ -46,7 +47,7 @@ try:
|
||||||
axs[aXn].plot(xs, y_r, label='te-$Pr(\oplus)$')
|
axs[aXn].plot(xs, y_r, label='te-$Pr(\oplus)$')
|
||||||
axs[aXn].legend()
|
axs[aXn].legend()
|
||||||
axs[aXn].grid()
|
axs[aXn].grid()
|
||||||
axs[aXn].set_ylabel('Prevalence estimation')
|
axs[aXn].set_ylabel('Prevalence')
|
||||||
aXn += 1
|
aXn += 1
|
||||||
|
|
||||||
y_ae = df['AE']
|
y_ae = df['AE']
|
||||||
|
@ -58,14 +59,6 @@ try:
|
||||||
axs[aXn].set_ylabel('Quantification error')
|
axs[aXn].set_ylabel('Quantification error')
|
||||||
aXn += 1
|
aXn += 1
|
||||||
|
|
||||||
axs[aXn].plot(xs, df['Shift'], label='tr-te shift (AE)')
|
|
||||||
axs[aXn].plot(xs, df['tr-prev'], label='tr-$Pr(\oplus)$')
|
|
||||||
axs[aXn].plot(xs, df['te-prev'], label='te-$Pr(\oplus)$')
|
|
||||||
axs[aXn].legend()
|
|
||||||
axs[aXn].grid()
|
|
||||||
axs[aXn].set_ylabel('Train-Test Shift')
|
|
||||||
aXn += 1
|
|
||||||
|
|
||||||
axs[aXn].plot(xs, df['MF1_Q'], label='$F_1(clf(Q))$')
|
axs[aXn].plot(xs, df['MF1_Q'], label='$F_1(clf(Q))$')
|
||||||
axs[aXn].plot(xs, df['MF1_Clf'], label='$F_1(clf(CC))$')
|
axs[aXn].plot(xs, df['MF1_Clf'], label='$F_1(clf(CC))$')
|
||||||
axs[aXn].legend()
|
axs[aXn].legend()
|
||||||
|
@ -73,6 +66,14 @@ try:
|
||||||
axs[aXn].set_ylabel('Classifiers performance')
|
axs[aXn].set_ylabel('Classifiers performance')
|
||||||
aXn += 1
|
aXn += 1
|
||||||
|
|
||||||
|
axs[aXn].plot(xs, df['Shift'], '--k', label='tr-te shift (AE)')
|
||||||
|
axs[aXn].plot(xs, df['tr-prev'], 'y', label='tr-$Pr(\oplus)$')
|
||||||
|
axs[aXn].plot(xs, df['te-prev'], 'r', label='te-$Pr(\oplus)$')
|
||||||
|
axs[aXn].legend()
|
||||||
|
axs[aXn].grid()
|
||||||
|
axs[aXn].set_ylabel('Train-Test Shift')
|
||||||
|
aXn += 1
|
||||||
|
|
||||||
os.makedirs('./plots', exist_ok=True)
|
os.makedirs('./plots', exist_ok=True)
|
||||||
plt.savefig(f'./plots/{plotname}')
|
plt.savefig(f'./plots/{plotname}')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue