diff --git a/eDiscovery/main.py b/eDiscovery/main.py index ded1f62..7f9ece1 100644 --- a/eDiscovery/main.py +++ b/eDiscovery/main.py @@ -91,6 +91,8 @@ def main(args): tee(f'{i}\t{progress:.2f}\t{nDtr}\t{nDte}\t{tr_p[1]:.3f}\t{te_p[1]:.3f}\t{pool_p_hat_q[1]:.3f}\t{pool_p_hat_cc[1]:.3f}' f'\t{r:.3f}\t{r_hat_q:.3f}\t{r_hat_cc:.3f}\t{tr_te_shift:.5f}\t{ae_q:.4f}\t{ae_cc:.4f}\t{f1_q:.3f}\t{f1_clf:.3f}') + raise Exception('add idealized costs for each iteration and plots') + posteriors = classifier.predict_proba(pool.instances) fig.plot(posteriors, pool.labels) @@ -120,10 +122,10 @@ if __name__ == '__main__': parser.add_argument('--k', metavar='BATCH', type=int, help='number of documents in a batch', default=100) parser.add_argument('--initsize', metavar='SIZE', type=int, help='number of labelled documents at the beginning', - default=1000) + default=2) parser.add_argument('--initprev', metavar='PREV', type=float, help='prevalence of the initial sample (-1 for uniform sampling, default)', - default=-1) + default=0.5) parser.add_argument('--seed', metavar='SEED', type=int, help='random seed', default=1)