plot for avg on test added
This commit is contained in:
parent
5f26bc7059
commit
2d73d583f4
|
@ -115,7 +115,7 @@ class CompReport:
|
||||||
|
|
||||||
shift_data = self._data.copy()
|
shift_data = self._data.copy()
|
||||||
shift_data.index = pd.MultiIndex.from_arrays([shift_idx_0, shift_idx_1])
|
shift_data.index = pd.MultiIndex.from_arrays([shift_idx_0, shift_idx_1])
|
||||||
shift_data.sort_index(axis=0, level=0)
|
shift_data = shift_data.sort_index(axis=0, level=0)
|
||||||
|
|
||||||
_metric = _get_metric(metric)
|
_metric = _get_metric(metric)
|
||||||
_estimators = _get_estimators(estimators, shift_data.columns.unique(1))
|
_estimators = _get_estimators(estimators, shift_data.columns.unique(1))
|
||||||
|
@ -246,7 +246,7 @@ class DatasetReport:
|
||||||
)
|
)
|
||||||
_crs_train, _crs_data = zip(*_crs_sorted)
|
_crs_train, _crs_data = zip(*_crs_sorted)
|
||||||
|
|
||||||
_data = pd.concat(_crs_data, axis=0, keys=_crs_train)
|
_data = pd.concat(_crs_data, axis=0, keys=np.around(_crs_train, decimals=2))
|
||||||
_data = _data.sort_index(axis=0, level=0)
|
_data = _data.sort_index(axis=0, level=0)
|
||||||
return _data
|
return _data
|
||||||
|
|
||||||
|
@ -296,44 +296,90 @@ class DatasetReport:
|
||||||
_data = self.data(metric=metric, estimators=estimators)
|
_data = self.data(metric=metric, estimators=estimators)
|
||||||
_shift_data = self.shift_data(metric=metric, estimators=estimators)
|
_shift_data = self.shift_data(metric=metric, estimators=estimators)
|
||||||
|
|
||||||
avg_x_test = _data.groupby(level=1).mean()
|
|
||||||
prevs_x_test = np.sort(avg_x_test.index.unique(0))
|
|
||||||
stdev_x_test = _data.groupby(level=1).std() if stdev else None
|
|
||||||
avg_x_test_tbl = _data.groupby(level=1).mean()
|
|
||||||
avg_x_test_tbl.loc["avg", :] = _data.mean()
|
|
||||||
|
|
||||||
avg_x_shift = _shift_data.groupby(level=0).mean()
|
|
||||||
prevs_x_shift = np.sort(avg_x_shift.index.unique(0))
|
|
||||||
|
|
||||||
res += "## avg\n"
|
res += "## avg\n"
|
||||||
res += avg_x_test_tbl.to_html() + "\n\n"
|
|
||||||
|
######################## avg on train ########################
|
||||||
|
res += "### avg on train\n"
|
||||||
|
|
||||||
|
avg_on_train = _data.groupby(level=1).mean()
|
||||||
|
prevs_on_train = np.sort(avg_on_train.index.unique(0))
|
||||||
|
stdev_on_train = _data.groupby(level=1).std() if stdev else None
|
||||||
|
avg_on_train_tbl = _data.groupby(level=1).mean()
|
||||||
|
avg_on_train_tbl.loc["avg", :] = _data.mean()
|
||||||
|
|
||||||
|
res += avg_on_train_tbl.to_html() + "\n\n"
|
||||||
|
|
||||||
delta_op = plot.plot_delta(
|
delta_op = plot.plot_delta(
|
||||||
base_prevs=np.around([(1.0 - p, p) for p in prevs_x_test], decimals=2),
|
base_prevs=np.around([(1.0 - p, p) for p in prevs_on_train], decimals=2),
|
||||||
columns=avg_x_test.columns.to_numpy(),
|
columns=avg_on_train.columns.to_numpy(),
|
||||||
data=avg_x_test.T.to_numpy(),
|
data=avg_on_train.T.to_numpy(),
|
||||||
metric=metric,
|
metric=metric,
|
||||||
name=conf,
|
name=conf,
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
|
avg="train",
|
||||||
)
|
)
|
||||||
res += f".as_posix()})\n"
|
res += f".as_posix()})\n"
|
||||||
|
|
||||||
if stdev:
|
if stdev:
|
||||||
delta_stdev_op = plot.plot_delta(
|
delta_stdev_op = plot.plot_delta(
|
||||||
base_prevs=np.around([(1.0 - p, p) for p in prevs_x_test], decimals=2),
|
base_prevs=np.around(
|
||||||
columns=avg_x_test.columns.to_numpy(),
|
[(1.0 - p, p) for p in prevs_on_train], decimals=2
|
||||||
data=avg_x_test.T.to_numpy(),
|
),
|
||||||
|
columns=avg_on_train.columns.to_numpy(),
|
||||||
|
data=avg_on_train.T.to_numpy(),
|
||||||
metric=metric,
|
metric=metric,
|
||||||
name=conf,
|
name=conf,
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
stdevs=stdev_x_test.T.to_numpy(),
|
stdevs=stdev_on_train.T.to_numpy(),
|
||||||
|
avg="train",
|
||||||
)
|
)
|
||||||
res += f".as_posix()})\n"
|
res += f".as_posix()})\n"
|
||||||
|
|
||||||
|
######################## avg on test ########################
|
||||||
|
res += "### avg on test\n"
|
||||||
|
|
||||||
|
avg_on_test = _data.groupby(level=0).mean()
|
||||||
|
prevs_on_test = np.sort(avg_on_test.index.unique(0))
|
||||||
|
stdev_on_test = _data.groupby(level=0).std() if stdev else None
|
||||||
|
avg_on_test_tbl = _data.groupby(level=0).mean()
|
||||||
|
avg_on_test_tbl.loc["avg", :] = _data.mean()
|
||||||
|
|
||||||
|
res += avg_on_test_tbl.to_html() + "\n\n"
|
||||||
|
|
||||||
|
delta_op = plot.plot_delta(
|
||||||
|
base_prevs=np.around([(1.0 - p, p) for p in prevs_on_test], decimals=2),
|
||||||
|
columns=avg_on_test.columns.to_numpy(),
|
||||||
|
data=avg_on_test.T.to_numpy(),
|
||||||
|
metric=metric,
|
||||||
|
name=conf,
|
||||||
|
train_prev=None,
|
||||||
|
avg="test",
|
||||||
|
)
|
||||||
|
res += f".as_posix()})\n"
|
||||||
|
|
||||||
|
if stdev:
|
||||||
|
delta_stdev_op = plot.plot_delta(
|
||||||
|
base_prevs=np.around([(1.0 - p, p) for p in prevs_on_test], decimals=2),
|
||||||
|
columns=avg_on_test.columns.to_numpy(),
|
||||||
|
data=avg_on_test.T.to_numpy(),
|
||||||
|
metric=metric,
|
||||||
|
name=conf,
|
||||||
|
train_prev=None,
|
||||||
|
stdevs=stdev_on_test.T.to_numpy(),
|
||||||
|
avg="test",
|
||||||
|
)
|
||||||
|
res += f".as_posix()})\n"
|
||||||
|
|
||||||
|
######################## avg shift ########################
|
||||||
|
res += "### avg dataset shift\n"
|
||||||
|
|
||||||
|
avg_shift = _shift_data.groupby(level=0).mean()
|
||||||
|
prevs_shift = np.sort(avg_shift.index.unique(0))
|
||||||
|
|
||||||
shift_op = plot.plot_shift(
|
shift_op = plot.plot_shift(
|
||||||
shift_prevs=np.around([(1.0 - p, p) for p in prevs_x_shift], decimals=2),
|
shift_prevs=np.around([(1.0 - p, p) for p in prevs_shift], decimals=2),
|
||||||
columns=avg_x_shift.columns.to_numpy(),
|
columns=avg_shift.columns.to_numpy(),
|
||||||
data=avg_x_shift.T.to_numpy(),
|
data=avg_shift.T.to_numpy(),
|
||||||
metric=metric,
|
metric=metric,
|
||||||
name=conf,
|
name=conf,
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
|
|
|
@ -29,13 +29,14 @@ def plot_delta(
|
||||||
train_prev=None,
|
train_prev=None,
|
||||||
fit_scores=None,
|
fit_scores=None,
|
||||||
legend=True,
|
legend=True,
|
||||||
|
avg=None,
|
||||||
) -> Path:
|
) -> Path:
|
||||||
_base_title = "delta_stdev" if stdevs is not None else "delta"
|
_base_title = "delta_stdev" if stdevs is not None else "delta"
|
||||||
if train_prev is not None:
|
if train_prev is not None:
|
||||||
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
t_prev_pos = int(round(train_prev[pos_class] * 100))
|
||||||
title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
|
title = f"{_base_title}_{name}_{t_prev_pos}_{metric}"
|
||||||
else:
|
else:
|
||||||
title = f"{_base_title}_{name}_avg_{metric}"
|
title = f"{_base_title}_{name}_avg_{avg}_{metric}"
|
||||||
|
|
||||||
fig, ax = plt.subplots()
|
fig, ax = plt.subplots()
|
||||||
ax.set_aspect("auto")
|
ax.set_aspect("auto")
|
||||||
|
@ -83,7 +84,12 @@ def plot_delta(
|
||||||
markersize=0,
|
markersize=0,
|
||||||
)
|
)
|
||||||
|
|
||||||
ax.set(xlabel="test prevalence", ylabel=metric, title=title)
|
x_label = "test" if avg is None or avg == "train" else "train"
|
||||||
|
ax.set(
|
||||||
|
xlabel=f"{x_label} prevalence",
|
||||||
|
ylabel=metric,
|
||||||
|
title=title,
|
||||||
|
)
|
||||||
|
|
||||||
if legend:
|
if legend:
|
||||||
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
ax.legend(loc="center left", bbox_to_anchor=(1, 0.5))
|
||||||
|
|
Loading…
Reference in New Issue