1
0
Fork 0

more testing

This commit is contained in:
Alejandro Moreo Fernandez 2022-01-19 09:53:07 +01:00
parent 833476ebf8
commit a3a5bd8da0
2 changed files with 28 additions and 19 deletions

View File

@ -5,7 +5,7 @@ from sklearn.metrics import f1_score
import functions as fn import functions as fn
import quapy as qp import quapy as qp
import argparse import argparse
from data import LabelledCollection from quapy.data import LabelledCollection
def eval_classifier(learner, test:LabelledCollection): def eval_classifier(learner, test:LabelledCollection):
@ -32,7 +32,10 @@ def main(args):
with qp.util.temp_seed(args.seed): with qp.util.temp_seed(args.seed):
# initial labelled data selection # initial labelled data selection
idx = collection.sampling_index(init_nD, *init_prev) if args.initprev == -1:
idx = collection.sampling_index(init_nD)
else:
idx = collection.sampling_index(init_nD, *[1 - args.initprev, args.initprev])
train, pool = fn.split_from_index(collection, idx) train, pool = fn.split_from_index(collection, idx)
first_train = LabelledCollection(train.instances, train.labels) first_train = LabelledCollection(train.instances, train.labels)
@ -50,8 +53,8 @@ def main(args):
while True: while True:
pool_p_hat_cc, classifier = fn.estimate_prev_CC(train, pool) pool_p_hat_cc, classifier = fn.estimate_prev_CC(train, pool, args.classifier)
pool_p_hat, q_classifier = fn.estimate_prev_Q(train, pool, args.quantifier) pool_p_hat, q_classifier = fn.estimate_prev_Q(train, pool, args.quantifier, args.classifier)
f1_clf = eval_classifier(classifier, pool) f1_clf = eval_classifier(classifier, pool)
f1_q = eval_classifier(q_classifier, pool) f1_q = eval_classifier(q_classifier, pool)
@ -103,13 +106,18 @@ if __name__=='__main__':
parser.add_argument('--initsize', metavar='SIZE', type=int, help='number of labelled documents at the beginning', parser.add_argument('--initsize', metavar='SIZE', type=int, help='number of labelled documents at the beginning',
default=1000) default=1000)
parser.add_argument('--initprev', metavar='PREV', type=float, parser.add_argument('--initprev', metavar='PREV', type=float,
help='prevalence of the initial sample (-1 for uniform sampling)', help='prevalence of the initial sample (-1 for uniform sampling, default)',
default=0.5) default=-1)
parser.add_argument('--seed', metavar='SEED', type=int, parser.add_argument('--seed', metavar='SEED', type=int,
help='random seed', help='random seed',
default=1) default=1)
parser.add_argument('--classifier', metavar='CLS', type=str,
help='classifier type (svm, lr)',
default='lr')
args = parser.parse_args() args = parser.parse_args()
assert 0 < args.initprev < 1, 'wrong value for initsize; should be in (0., 1.)' assert args.initprev==-1.0 or (0 < args.initprev < 1), 'wrong value for initsize; should be in (0., 1.)'
if args.initprev==-1: # this is to clean the path, to show initprev:-1 and not initprev:-1.0
args.initprev = int(args.initprev)
main(args) main(args)

View File

@ -5,9 +5,11 @@ import sys, os, pathlib
assert len(sys.argv) == 3, f'wrong args, syntax is: python {sys.argv[0]} <result_input_path> <dynamic (0|1)>' assert len(sys.argv) == 3, f'wrong args, syntax is: python {sys.argv[0]} <result_input_path> <dynamic (0|1)>'
file = sys.argv[1] file = str(sys.argv[1])
loop = bool(int(sys.argv[2])) loop = bool(int(sys.argv[2]))
print(file)
plotname = pathlib.Path(file).name.replace(".csv", ".png") plotname = pathlib.Path(file).name.replace(".csv", ".png")
if not loop: if not loop:
@ -18,7 +20,6 @@ if not loop:
fig, axs = plt.subplots(5) fig, axs = plt.subplots(5)
try: try:
while True: while True:
aXn = 0 aXn = 0
@ -34,7 +35,7 @@ try:
axs[aXn].plot(xs, y_r, label='$R$') axs[aXn].plot(xs, y_r, label='$R$')
axs[aXn].legend() axs[aXn].legend()
axs[aXn].grid() axs[aXn].grid()
axs[aXn].set_ylabel('Recall estimation') axs[aXn].set_ylabel('Recall')
axs[aXn].set_ylim(0,1) axs[aXn].set_ylim(0,1)
aXn+=1 aXn+=1
@ -46,7 +47,7 @@ try:
axs[aXn].plot(xs, y_r, label='te-$Pr(\oplus)$') axs[aXn].plot(xs, y_r, label='te-$Pr(\oplus)$')
axs[aXn].legend() axs[aXn].legend()
axs[aXn].grid() axs[aXn].grid()
axs[aXn].set_ylabel('Prevalence estimation') axs[aXn].set_ylabel('Prevalence')
aXn += 1 aXn += 1
y_ae = df['AE'] y_ae = df['AE']
@ -58,14 +59,6 @@ try:
axs[aXn].set_ylabel('Quantification error') axs[aXn].set_ylabel('Quantification error')
aXn += 1 aXn += 1
axs[aXn].plot(xs, df['Shift'], label='tr-te shift (AE)')
axs[aXn].plot(xs, df['tr-prev'], label='tr-$Pr(\oplus)$')
axs[aXn].plot(xs, df['te-prev'], label='te-$Pr(\oplus)$')
axs[aXn].legend()
axs[aXn].grid()
axs[aXn].set_ylabel('Train-Test Shift')
aXn += 1
axs[aXn].plot(xs, df['MF1_Q'], label='$F_1(clf(Q))$') axs[aXn].plot(xs, df['MF1_Q'], label='$F_1(clf(Q))$')
axs[aXn].plot(xs, df['MF1_Clf'], label='$F_1(clf(CC))$') axs[aXn].plot(xs, df['MF1_Clf'], label='$F_1(clf(CC))$')
axs[aXn].legend() axs[aXn].legend()
@ -73,6 +66,14 @@ try:
axs[aXn].set_ylabel('Classifiers performance') axs[aXn].set_ylabel('Classifiers performance')
aXn += 1 aXn += 1
axs[aXn].plot(xs, df['Shift'], '--k', label='tr-te shift (AE)')
axs[aXn].plot(xs, df['tr-prev'], 'y', label='tr-$Pr(\oplus)$')
axs[aXn].plot(xs, df['te-prev'], 'r', label='te-$Pr(\oplus)$')
axs[aXn].legend()
axs[aXn].grid()
axs[aXn].set_ylabel('Train-Test Shift')
aXn += 1
os.makedirs('./plots', exist_ok=True) os.makedirs('./plots', exist_ok=True)
plt.savefig(f'./plots/{plotname}') plt.savefig(f'./plots/{plotname}')