baselines code updated
This commit is contained in:
parent
6a5a7a0153
commit
232a670305
|
@ -4,6 +4,7 @@
|
|||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "main",
|
||||
"type": "python",
|
||||
|
@ -11,6 +12,14 @@
|
|||
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\quacc\\main.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true
|
||||
},
|
||||
{
|
||||
"name": "models",
|
||||
"type": "python",
|
||||
"request": "launch",
|
||||
"program": "C:\\Users\\Lorenzo Volpi\\source\\tesi\\baselines\\models.py",
|
||||
"console": "integratedTerminal",
|
||||
"justMyCode": true
|
||||
}
|
||||
]
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,277 @@
|
|||
"""
|
||||
Relative Unconstrained Least-Squares Fitting (RuLSIF): A Python Implementation
|
||||
References:
|
||||
'Change-point detection in time-series data by relative density-ratio estimation'
|
||||
Song Liu, Makoto Yamada, Nigel Collier and Masashi Sugiyama,
|
||||
Neural Networks 43 (2013) 72-83.
|
||||
|
||||
'A Least-squares Approach to Direct Importance Estimation'
|
||||
Takafumi Kanamori, Shohei Hido, and Masashi Sugiyama,
|
||||
Journal of Machine Learning Research 10 (2009) 1391-1445.
|
||||
"""
|
||||
|
||||
from warnings import warn
|
||||
|
||||
from numpy import (
|
||||
array,
|
||||
asarray,
|
||||
asmatrix,
|
||||
diag,
|
||||
diagflat,
|
||||
empty,
|
||||
exp,
|
||||
inf,
|
||||
log,
|
||||
matrix,
|
||||
multiply,
|
||||
ones,
|
||||
power,
|
||||
sum,
|
||||
)
|
||||
from numpy.linalg import solve
|
||||
from numpy.random import randint
|
||||
|
||||
from .density_ratio import DensityRatio, KernelInfo
|
||||
from .helpers import guvectorize_compute, np_float, to_ndarray
|
||||
|
||||
|
||||
def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
|
||||
"""
|
||||
Estimation of the alpha-Relative Density Ratio p(x)/p_alpha(x) by RuLSIF
|
||||
(Relative Unconstrained Least-Square Importance Fitting)
|
||||
|
||||
p_alpha(x) = alpha * p(x) + (1 - alpha) * q(x)
|
||||
|
||||
Arguments:
|
||||
x (numpy.matrix): Sample from p(x).
|
||||
y (numpy.matrix): Sample from q(x).
|
||||
alpha (float): Mixture parameter.
|
||||
sigma_range (list<float>): Search range of Gaussian kernel bandwidth.
|
||||
lambda_range (list<float>): Search range of regularization parameter.
|
||||
kernel_num (int): Number of kernels. (Default 100)
|
||||
verbose (bool): Indicator to print messages (Default True)
|
||||
|
||||
Returns:
|
||||
densratio.DensityRatio object which has `compute_density_ratio()`.
|
||||
"""
|
||||
|
||||
# Number of samples.
|
||||
nx = x.shape[0]
|
||||
ny = y.shape[0]
|
||||
|
||||
# Number of kernel functions.
|
||||
kernel_num = min(kernel_num, nx)
|
||||
|
||||
# Randomly take a subset of x, to identify centers for the kernels.
|
||||
centers = x[randint(nx, size=kernel_num)]
|
||||
|
||||
if verbose:
|
||||
print("RuLSIF starting...")
|
||||
|
||||
if len(sigma_range) == 1 and len(lambda_range) == 1:
|
||||
sigma = sigma_range[0]
|
||||
lambda_ = lambda_range[0]
|
||||
else:
|
||||
if verbose:
|
||||
print("Searching for the optimal sigma and lambda...")
|
||||
|
||||
# Grid-search cross-validation for optimal kernel and regularization parameters.
|
||||
opt_params = search_sigma_and_lambda(
|
||||
x, y, alpha, centers, sigma_range, lambda_range, verbose
|
||||
)
|
||||
sigma = opt_params["sigma"]
|
||||
lambda_ = opt_params["lambda"]
|
||||
|
||||
if verbose:
|
||||
print(
|
||||
"Found optimal sigma = {:.3f}, lambda = {:.3f}.".format(sigma, lambda_)
|
||||
)
|
||||
|
||||
if verbose:
|
||||
print("Optimizing theta...")
|
||||
|
||||
phi_x = compute_kernel_Gaussian(x, centers, sigma)
|
||||
phi_y = compute_kernel_Gaussian(y, centers, sigma)
|
||||
H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
|
||||
h = phi_x.mean(axis=0).T
|
||||
theta = asarray(solve(H + diag(array(lambda_).repeat(kernel_num)), h)).ravel()
|
||||
|
||||
# No negative coefficients.
|
||||
theta[theta < 0] = 0
|
||||
|
||||
# Compute the alpha-relative density ratio, at the given coordinates.
|
||||
def alpha_density_ratio(coordinates):
|
||||
# Evaluate the kernel at these coordinates, and take the dot-product with the weights.
|
||||
coordinates = to_ndarray(coordinates)
|
||||
phi_x = compute_kernel_Gaussian(coordinates, centers, sigma)
|
||||
alpha_density_ratio = phi_x @ theta
|
||||
|
||||
return alpha_density_ratio
|
||||
|
||||
# Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
|
||||
def alpha_PE_divergence(x, y):
|
||||
# This is Y, in Reference 1.
|
||||
x = to_ndarray(x)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_x = alpha_density_ratio(x)
|
||||
|
||||
# This is Y', in Reference 1.
|
||||
y = to_ndarray(y)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_y = alpha_density_ratio(y)
|
||||
|
||||
# Compute the alpha-relative PE-divergence as given in Reference 1.
|
||||
n = x.shape[0]
|
||||
divergence = (
|
||||
-alpha * (g_x @ g_x) / 2 - (1 - alpha) * (g_y @ g_y) / 2 + g_x.sum(axis=0)
|
||||
) / n - 1.0 / 2
|
||||
return divergence
|
||||
|
||||
# Compute the approximate alpha-relative KL-divergence, given samples x and y from the respective distributions.
|
||||
def alpha_KL_divergence(x, y):
|
||||
# This is Y, in Reference 1.
|
||||
x = to_ndarray(x)
|
||||
|
||||
# Obtain alpha-relative density ratio at these points.
|
||||
g_x = alpha_density_ratio(x)
|
||||
|
||||
# Compute the alpha-relative KL-divergence.
|
||||
n = x.shape[0]
|
||||
divergence = log(g_x).sum(axis=0) / n
|
||||
return divergence
|
||||
|
||||
alpha_PE = alpha_PE_divergence(x, y)
|
||||
alpha_KL = alpha_KL_divergence(x, y)
|
||||
|
||||
if verbose:
|
||||
print("Approximate alpha-relative PE-divergence = {:03.2f}".format(alpha_PE))
|
||||
print("Approximate alpha-relative KL-divergence = {:03.2f}".format(alpha_KL))
|
||||
|
||||
kernel_info = KernelInfo(
|
||||
kernel_type="Gaussian", kernel_num=kernel_num, sigma=sigma, centers=centers
|
||||
)
|
||||
result = DensityRatio(
|
||||
method="RuLSIF",
|
||||
alpha=alpha,
|
||||
theta=theta,
|
||||
lambda_=lambda_,
|
||||
alpha_PE=alpha_PE,
|
||||
alpha_KL=alpha_KL,
|
||||
kernel_info=kernel_info,
|
||||
compute_density_ratio=alpha_density_ratio,
|
||||
)
|
||||
|
||||
if verbose:
|
||||
print("RuLSIF completed.")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
# Grid-search cross-validation for the optimal parameters sigma and lambda by leave-one-out cross-validation. See Reference 2.
|
||||
def search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose):
|
||||
nx = x.shape[0]
|
||||
ny = y.shape[0]
|
||||
n_min = min(nx, ny)
|
||||
kernel_num = centers.shape[0]
|
||||
|
||||
score_new = inf
|
||||
sigma_new = 0
|
||||
lambda_new = 0
|
||||
|
||||
for sigma in sigma_range:
|
||||
phi_x = compute_kernel_Gaussian(x, centers, sigma) # (nx, kernel_num)
|
||||
phi_y = compute_kernel_Gaussian(y, centers, sigma) # (ny, kernel_num)
|
||||
H = alpha * (phi_x.T @ phi_x / nx) + (1 - alpha) * (
|
||||
phi_y.T @ phi_y / ny
|
||||
) # (kernel_num, kernel_num)
|
||||
h = phi_x.mean(axis=0).reshape(-1, 1) # (kernel_num, 1)
|
||||
phi_x = phi_x[:n_min].T # (kernel_num, n_min)
|
||||
phi_y = phi_y[:n_min].T # (kernel_num, n_min)
|
||||
|
||||
for lambda_ in lambda_range:
|
||||
B = H + diag(
|
||||
array(lambda_ * (ny - 1) / ny).repeat(kernel_num)
|
||||
) # (kernel_num, kernel_num)
|
||||
B_inv_X = solve(B, phi_y) # (kernel_num, n_min)
|
||||
X_B_inv_X = multiply(phi_y, B_inv_X) # (kernel_num, n_min)
|
||||
denom = ny * ones(n_min) - ones(kernel_num) @ X_B_inv_X # (n_min, )
|
||||
B0 = solve(B, h @ ones((1, n_min))) + B_inv_X @ diagflat(
|
||||
h.T @ B_inv_X / denom
|
||||
) # (kernel_num, n_min)
|
||||
B1 = solve(B, phi_x) + B_inv_X @ diagflat(
|
||||
ones(kernel_num) @ multiply(phi_x, B_inv_X)
|
||||
) # (kernel_num, n_min)
|
||||
B2 = (ny - 1) * (nx * B0 - B1) / (ny * (nx - 1)) # (kernel_num, n_min)
|
||||
B2[B2 < 0] = 0
|
||||
r_y = multiply(phi_y, B2).sum(axis=0).T # (n_min, )
|
||||
r_x = multiply(phi_x, B2).sum(axis=0).T # (n_min, )
|
||||
|
||||
# Squared loss of RuLSIF, without regularization term.
|
||||
# Directly related to the negative of the PE-divergence.
|
||||
score = (r_y @ r_y / 2 - r_x.sum(axis=0)) / n_min
|
||||
|
||||
if verbose:
|
||||
print(
|
||||
"sigma = %.5f, lambda = %.5f, score = %.5f"
|
||||
% (sigma, lambda_, score)
|
||||
)
|
||||
|
||||
if score < score_new:
|
||||
score_new = score
|
||||
sigma_new = sigma
|
||||
lambda_new = lambda_
|
||||
|
||||
return {"sigma": sigma_new, "lambda": lambda_new}
|
||||
|
||||
|
||||
def _compute_kernel_Gaussian(x_list, y_row, neg_gamma, res) -> None:
|
||||
sq_norm = sum(power(x_list - y_row, 2), 1)
|
||||
multiply(neg_gamma, sq_norm, res)
|
||||
exp(res, res)
|
||||
|
||||
|
||||
def _target_numpy_wrapper(x_list, y_list, neg_gamma):
|
||||
res = empty((y_list.shape[0], x_list.shape[0]), np_float)
|
||||
if isinstance(x_list, matrix) or isinstance(y_list, matrix):
|
||||
res = asmatrix(res)
|
||||
|
||||
for j, y_row in enumerate(y_list):
|
||||
# `.T` aligns shapes for matrices, does nothing for 1D ndarray.
|
||||
_compute_kernel_Gaussian(x_list, y_row, neg_gamma, res[j].T)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
_compute_functions = {"numpy": _target_numpy_wrapper}
|
||||
if guvectorize_compute:
|
||||
_compute_functions.update(
|
||||
{
|
||||
key: guvectorize_compute(key)(_compute_kernel_Gaussian)
|
||||
for key in ("cpu", "parallel")
|
||||
}
|
||||
)
|
||||
|
||||
_compute_function = _compute_functions[
|
||||
"cpu" if "cpu" in _compute_functions else "numpy"
|
||||
]
|
||||
|
||||
|
||||
# Returns a 2D numpy matrix of kernel evaluated at the gridpoints with coordinates from x_list and y_list.
|
||||
def compute_kernel_Gaussian(x_list, y_list, sigma):
|
||||
return _compute_function(x_list, y_list, -0.5 * sigma**-2).T
|
||||
|
||||
|
||||
def set_compute_kernel_target(target: str) -> None:
|
||||
global _compute_function
|
||||
if target not in ("numpy", "cpu", "parallel"):
|
||||
raise ValueError(
|
||||
"'target' must be one of the following: 'numpy', 'cpu', or 'parallel'."
|
||||
)
|
||||
|
||||
if target not in _compute_functions:
|
||||
warn("'numba' not available; defaulting to 'numpy'.", ImportWarning)
|
||||
target = "numpy"
|
||||
|
||||
_compute_function = _compute_functions[target]
|
|
@ -0,0 +1,7 @@
|
|||
from warnings import filterwarnings
|
||||
|
||||
from .core import densratio
|
||||
from .RuLSIF import set_compute_kernel_target
|
||||
|
||||
filterwarnings("default", message="'numba'", category=ImportWarning, module="densratio")
|
||||
__all__ = ["densratio", "set_compute_kernel_target"]
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,70 @@
|
|||
"""
|
||||
densratio.core
|
||||
~~~~~~~~~~~~~~
|
||||
|
||||
Estimate Density Ratio p(x)/q(y)
|
||||
"""
|
||||
|
||||
from numpy import linspace
|
||||
|
||||
from .helpers import to_ndarray
|
||||
from .RuLSIF import RuLSIF
|
||||
|
||||
|
||||
def densratio(
|
||||
x, y, alpha=0, sigma_range="auto", lambda_range="auto", kernel_num=100, verbose=True
|
||||
):
|
||||
"""Estimate alpha-mixture Density Ratio p(x)/(alpha*p(x) + (1 - alpha)*q(x))
|
||||
|
||||
Arguments:
|
||||
x: sample from p(x).
|
||||
y: sample from q(x).
|
||||
alpha: Default 0 - corresponds to ordinary density ratio.
|
||||
sigma_range: search range of Gaussian kernel bandwidth.
|
||||
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
||||
lambda_range: search range of regularization parameter for uLSIF.
|
||||
Default "auto" means 10^-3, 10^-2, ..., 10^9.
|
||||
kernel_num: number of kernels. Default 100.
|
||||
verbose: indicator to print messages. Default True.
|
||||
|
||||
Returns:
|
||||
densratio.DensityRatio object which has `compute_density_ratio()`.
|
||||
|
||||
Raises:
|
||||
ValueError: if dimension of x != dimension of y
|
||||
|
||||
Usage::
|
||||
>>> from scipy.stats import norm
|
||||
>>> from densratio import densratio
|
||||
|
||||
>>> x = norm.rvs(size=200, loc=1, scale=1./8)
|
||||
>>> y = norm.rvs(size=200, loc=1, scale=1./2)
|
||||
>>> result = densratio(x, y, alpha=0.7)
|
||||
>>> print(result)
|
||||
|
||||
>>> density_ratio = result.compute_density_ratio(y)
|
||||
>>> print(density_ratio)
|
||||
"""
|
||||
|
||||
x = to_ndarray(x)
|
||||
y = to_ndarray(y)
|
||||
|
||||
if x.shape[1] != y.shape[1]:
|
||||
raise ValueError("x and y must be same dimensions.")
|
||||
|
||||
if isinstance(sigma_range, str) and sigma_range != "auto":
|
||||
raise TypeError("Invalid value for sigma_range.")
|
||||
|
||||
if isinstance(lambda_range, str) and lambda_range != "auto":
|
||||
raise TypeError("Invalid value for lambda_range.")
|
||||
|
||||
if sigma_range is None or (isinstance(sigma_range, str) and sigma_range == "auto"):
|
||||
sigma_range = 10 ** linspace(-3, 9, 13)
|
||||
|
||||
if lambda_range is None or (
|
||||
isinstance(lambda_range, str) and lambda_range == "auto"
|
||||
):
|
||||
lambda_range = 10 ** linspace(-3, 9, 13)
|
||||
|
||||
result = RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num, verbose)
|
||||
return result
|
|
@ -0,0 +1,88 @@
|
|||
from pprint import pformat
|
||||
from re import sub
|
||||
|
||||
|
||||
class DensityRatio:
|
||||
"""Density Ratio."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
method,
|
||||
alpha,
|
||||
theta,
|
||||
lambda_,
|
||||
alpha_PE,
|
||||
alpha_KL,
|
||||
kernel_info,
|
||||
compute_density_ratio,
|
||||
):
|
||||
self.method = method
|
||||
self.alpha = alpha
|
||||
self.theta = theta
|
||||
self.lambda_ = lambda_
|
||||
self.alpha_PE = alpha_PE
|
||||
self.alpha_KL = alpha_KL
|
||||
self.kernel_info = kernel_info
|
||||
self.compute_density_ratio = compute_density_ratio
|
||||
|
||||
def __str__(self):
|
||||
return """
|
||||
Method: %(method)s
|
||||
|
||||
Alpha: %(alpha)s
|
||||
|
||||
Kernel Information:
|
||||
%(kernel_info)s
|
||||
|
||||
Kernel Weights (theta):
|
||||
%(theta)s
|
||||
|
||||
Regularization Parameter (lambda): %(lambda_)s
|
||||
|
||||
Alpha-Relative PE-Divergence: %(alpha_PE)s
|
||||
|
||||
Alpha-Relative KL-Divergence: %(alpha_KL)s
|
||||
|
||||
Function to Estimate Density Ratio:
|
||||
compute_density_ratio(x)
|
||||
|
||||
"""[
|
||||
1:-1
|
||||
] % dict(
|
||||
method=self.method,
|
||||
kernel_info=self.kernel_info,
|
||||
alpha=self.alpha,
|
||||
theta=my_format(self.theta),
|
||||
lambda_=self.lambda_,
|
||||
alpha_PE=self.alpha_PE,
|
||||
alpha_KL=self.alpha_KL,
|
||||
)
|
||||
|
||||
|
||||
class KernelInfo:
|
||||
"""Kernel Information."""
|
||||
|
||||
def __init__(self, kernel_type, kernel_num, sigma, centers):
|
||||
self.kernel_type = kernel_type
|
||||
self.kernel_num = kernel_num
|
||||
self.sigma = sigma
|
||||
self.centers = centers
|
||||
|
||||
def __str__(self):
|
||||
return """
|
||||
Kernel type: %(kernel_type)s
|
||||
Number of kernels: %(kernel_num)s
|
||||
Bandwidth(sigma): %(sigma)s
|
||||
Centers: %(centers)s
|
||||
"""[
|
||||
1:-1
|
||||
] % dict(
|
||||
kernel_type=self.kernel_type,
|
||||
kernel_num=self.kernel_num,
|
||||
sigma=self.sigma,
|
||||
centers=my_format(self.centers),
|
||||
)
|
||||
|
||||
|
||||
def my_format(str):
|
||||
return sub(r"\s+", " ", (pformat(str).split("\n")[0] + ".."))
|
|
@ -0,0 +1,36 @@
|
|||
from numpy import array, ndarray, result_type
|
||||
|
||||
np_float = result_type(float)
|
||||
try:
|
||||
import numba as nb
|
||||
except ModuleNotFoundError:
|
||||
guvectorize_compute = None
|
||||
else:
|
||||
_nb_float = nb.from_dtype(np_float)
|
||||
|
||||
def guvectorize_compute(target: str, *, cache: bool = True):
|
||||
return nb.guvectorize(
|
||||
[nb.void(_nb_float[:, :], _nb_float[:], _nb_float, _nb_float[:])],
|
||||
"(m, p),(p),()->(m)",
|
||||
nopython=True,
|
||||
target=target,
|
||||
cache=cache,
|
||||
)
|
||||
|
||||
|
||||
def is_numeric(x):
|
||||
return isinstance(x, int) or isinstance(x, float)
|
||||
|
||||
|
||||
def to_ndarray(x):
|
||||
if isinstance(x, ndarray):
|
||||
if len(x.shape) == 1:
|
||||
return x.reshape(-1, 1)
|
||||
else:
|
||||
return x
|
||||
elif str(type(x)) == "<class 'pandas.core.frame.DataFrame'>":
|
||||
return x.values
|
||||
elif not x:
|
||||
raise ValueError("Cannot transform to numpy.matrix.")
|
||||
else:
|
||||
return to_ndarray(array(x))
|
|
@ -0,0 +1,52 @@
|
|||
import numpy as np
|
||||
from scipy.sparse import issparse, vstack
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
|
||||
def logreg(Xtr, ytr, Xte):
|
||||
# check "Direct Density Ratio Estimation for
|
||||
# Large-scale Covariate Shift Adaptation", Eq.28
|
||||
|
||||
if issparse(Xtr):
|
||||
X = vstack([Xtr, Xte])
|
||||
else:
|
||||
X = np.concatenate([Xtr, Xte])
|
||||
|
||||
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
||||
|
||||
logreg = GridSearchCV(
|
||||
LogisticRegression(),
|
||||
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
||||
n_jobs=-1,
|
||||
)
|
||||
logreg.fit(X, y)
|
||||
probs = logreg.predict_proba(Xtr)
|
||||
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
||||
prior_train = Xtr.shape[0]
|
||||
prior_test = Xte.shape[0]
|
||||
w = (prior_train / prior_test) * (prob_test / prob_train)
|
||||
return w
|
||||
|
||||
|
||||
kdex2_params = {"bandwidth": np.logspace(-1, 1, 20)}
|
||||
|
||||
|
||||
def kdex2_lltr(Xtr):
|
||||
if issparse(Xtr):
|
||||
Xtr = Xtr.toarray()
|
||||
return GridSearchCV(KernelDensity(), kdex2_params).fit(Xtr).score_samples(Xtr)
|
||||
|
||||
|
||||
def kdex2_weights(Xtr, Xte, log_likelihood_tr):
|
||||
log_likelihood_te = (
|
||||
GridSearchCV(KernelDensity(), kdex2_params).fit(Xte).score_samples(Xtr)
|
||||
)
|
||||
likelihood_tr = np.exp(log_likelihood_tr)
|
||||
likelihood_te = np.exp(log_likelihood_te)
|
||||
return likelihood_te / likelihood_tr
|
||||
|
||||
|
||||
def get_acc(tr_preds, ytr, w):
|
||||
return np.sum((1.0 * (tr_preds == ytr)) * w) / np.sum(w)
|
|
@ -0,0 +1,140 @@
|
|||
# import itertools
|
||||
# from typing import Iterable
|
||||
|
||||
# import quapy as qp
|
||||
# import quapy.functional as F
|
||||
# from densratio import densratio
|
||||
# from quapy.method.aggregative import *
|
||||
# from quapy.protocol import (
|
||||
# AbstractStochasticSeededProtocol,
|
||||
# OnLabelledCollectionProtocol,
|
||||
# )
|
||||
# from scipy.sparse import issparse, vstack
|
||||
# from scipy.spatial.distance import cdist
|
||||
# from scipy.stats import multivariate_normal
|
||||
# from sklearn.linear_model import LogisticRegression
|
||||
# from sklearn.model_selection import GridSearchCV
|
||||
# from sklearn.neighbors import KernelDensity
|
||||
|
||||
import time
|
||||
|
||||
import numpy as np
|
||||
import sklearn.metrics as metrics
|
||||
from pykliep import DensityRatioEstimator
|
||||
from quapy.protocol import APP
|
||||
from scipy.sparse import issparse, vstack
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.model_selection import GridSearchCV
|
||||
from sklearn.neighbors import KernelDensity
|
||||
|
||||
import baselines.impweight as iw
|
||||
from baselines.densratio import densratio
|
||||
from quacc.dataset import Dataset
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------------------
|
||||
# Methods of "importance weight", e.g., by ratio density estimation (KLIEP, SILF, LogReg)
|
||||
# ---------------------------------------------------------------------------------------
|
||||
class ImportanceWeight:
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
...
|
||||
|
||||
|
||||
class KLIEP(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
kliep = DensityRatioEstimator()
|
||||
kliep.fit(Xtr, Xte)
|
||||
return kliep.predict(Xtr)
|
||||
|
||||
|
||||
class USILF(ImportanceWeight):
|
||||
def __init__(self, alpha=0.0):
|
||||
self.alpha = alpha
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
dense_ratio_obj = densratio(Xtr, Xte, alpha=self.alpha, verbose=False)
|
||||
return dense_ratio_obj.compute_density_ratio(Xtr)
|
||||
|
||||
|
||||
class LogReg(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
# check "Direct Density Ratio Estimation for
|
||||
# Large-scale Covariate Shift Adaptation", Eq.28
|
||||
|
||||
if issparse(Xtr):
|
||||
X = vstack([Xtr, Xte])
|
||||
else:
|
||||
X = np.concatenate([Xtr, Xte])
|
||||
|
||||
y = [0] * Xtr.shape[0] + [1] * Xte.shape[0]
|
||||
|
||||
logreg = GridSearchCV(
|
||||
LogisticRegression(),
|
||||
param_grid={"C": np.logspace(-3, 3, 7), "class_weight": ["balanced", None]},
|
||||
n_jobs=-1,
|
||||
)
|
||||
logreg.fit(X, y)
|
||||
probs = logreg.predict_proba(Xtr)
|
||||
prob_train, prob_test = probs[:, 0], probs[:, 1]
|
||||
prior_train = Xtr.shape[0]
|
||||
prior_test = Xte.shape[0]
|
||||
w = (prior_train / prior_test) * (prob_test / prob_train)
|
||||
return w
|
||||
|
||||
|
||||
class KDEx2(ImportanceWeight):
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def weights(self, Xtr, ytr, Xte):
|
||||
params = {"bandwidth": np.logspace(-1, 1, 20)}
|
||||
log_likelihood_tr = (
|
||||
GridSearchCV(KernelDensity(), params).fit(Xtr).score_samples(Xtr)
|
||||
)
|
||||
log_likelihood_te = (
|
||||
GridSearchCV(KernelDensity(), params).fit(Xte).score_samples(Xtr)
|
||||
)
|
||||
likelihood_tr = np.exp(log_likelihood_tr)
|
||||
likelihood_te = np.exp(log_likelihood_te)
|
||||
return likelihood_te / likelihood_tr
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# d = Dataset("rcv1", target="CCAT").get_raw()
|
||||
d = Dataset("imdb", n_prevalences=1).get()[0]
|
||||
|
||||
tstart = time.time()
|
||||
lr = LogisticRegression()
|
||||
lr.fit(*d.train.Xy)
|
||||
val_preds = lr.predict(d.validation.X)
|
||||
protocol = APP(
|
||||
d.test,
|
||||
n_prevalences=21,
|
||||
repeats=1,
|
||||
sample_size=100,
|
||||
return_type="labelled_collection",
|
||||
)
|
||||
|
||||
results = []
|
||||
for sample in protocol():
|
||||
wx = iw.logreg(d.validation.X, d.validation.y, sample.X)
|
||||
test_preds = lr.predict(sample.X)
|
||||
estim_acc = np.sum((1.0 * (val_preds == d.validation.y)) * wx) / np.sum(wx)
|
||||
true_acc = metrics.accuracy_score(sample.y, test_preds)
|
||||
results.append((sample.prevalence(), estim_acc, true_acc))
|
||||
|
||||
tend = time.time()
|
||||
|
||||
for r in results:
|
||||
print(*r)
|
||||
|
||||
print(f"logreg finished [took {tend-tstart:.3f}s]")
|
||||
import win11toast
|
||||
|
||||
win11toast.notify("models.py", "Completed")
|
|
@ -0,0 +1,219 @@
|
|||
import warnings
|
||||
|
||||
import numpy as np
|
||||
from scipy.sparse import csr_matrix
|
||||
|
||||
|
||||
class DensityRatioEstimator:
|
||||
"""
|
||||
Class to accomplish direct density estimation implementing the original KLIEP
|
||||
algorithm from Direct Importance Estimation with Model Selection
|
||||
and Its Application to Covariate Shift Adaptation by Sugiyama et al.
|
||||
|
||||
The training set is distributed via
|
||||
train ~ p(x)
|
||||
and the test set is distributed via
|
||||
test ~ q(x).
|
||||
|
||||
The KLIEP algorithm and its variants approximate w(x) = q(x) / p(x) directly. The predict function returns the
|
||||
estimate of w(x). The function w(x) can serve as sample weights for the training set during
|
||||
training to modify the expectation function that the model's loss function is optimized via,
|
||||
i.e.
|
||||
|
||||
E_{x ~ w(x)p(x)} loss(x) = E_{x ~ q(x)} loss(x).
|
||||
|
||||
Usage :
|
||||
The fit method is used to run the KLIEP algorithm using LCV and returns value of J
|
||||
trained on the entire training/test set with the best sigma found.
|
||||
Use the predict method on the training set to determine the sample weights from the KLIEP algorithm.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
max_iter=5000,
|
||||
num_params=[0.1, 0.2],
|
||||
epsilon=1e-4,
|
||||
cv=3,
|
||||
sigmas=[0.01, 0.1, 0.25, 0.5, 0.75, 1],
|
||||
random_state=None,
|
||||
verbose=0,
|
||||
):
|
||||
"""
|
||||
Direct density estimation using an inner LCV loop to estimate the proper model. Can be used with sklearn
|
||||
cross validation methods with or without storing the inner CV. To use a standard grid search.
|
||||
|
||||
|
||||
max_iter : Number of iterations to perform
|
||||
num_params : List of number of test set vectors used to construct the approximation for inner LCV.
|
||||
Must be a float. Original paper used 10%, i.e. =.1
|
||||
sigmas : List of sigmas to be used in inner LCV loop.
|
||||
epsilon : Additive factor in the iterative algorithm for numerical stability.
|
||||
"""
|
||||
self.max_iter = max_iter
|
||||
self.num_params = num_params
|
||||
self.epsilon = epsilon
|
||||
self.verbose = verbose
|
||||
self.sigmas = sigmas
|
||||
self.cv = cv
|
||||
self.random_state = 0
|
||||
|
||||
def fit(self, X_train, X_test, alpha_0=None):
|
||||
"""Uses cross validation to select sigma as in the original paper (LCV).
|
||||
In a break from sklearn convention, y=X_test.
|
||||
The parameter cv corresponds to R in the original paper.
|
||||
Once found, the best sigma is used to train on the full set."""
|
||||
|
||||
# LCV loop, shuffle a copy in place for performance.
|
||||
cv = self.cv
|
||||
chunk = int(X_test.shape[0] / float(cv))
|
||||
if self.random_state is not None:
|
||||
np.random.seed(self.random_state)
|
||||
# if isinstance(X_test, csr_matrix):
|
||||
# X_test_shuffled = X_test.toarray()
|
||||
# else:
|
||||
# X_test_shuffled = X_test.copy()
|
||||
X_test_shuffled = X_test.copy()
|
||||
|
||||
np.random.shuffle(X_test_shuffled)
|
||||
|
||||
j_scores = {}
|
||||
|
||||
if type(self.sigmas) != list:
|
||||
self.sigmas = [self.sigmas]
|
||||
|
||||
if type(self.num_params) != list:
|
||||
self.num_params = [self.num_params]
|
||||
|
||||
if len(self.sigmas) * len(self.num_params) > 1:
|
||||
# Inner LCV loop
|
||||
for num_param in self.num_params:
|
||||
for sigma in self.sigmas:
|
||||
j_scores[(num_param, sigma)] = np.zeros(cv)
|
||||
for k in range(1, cv + 1):
|
||||
if self.verbose > 0:
|
||||
print("Training: sigma: %s R: %s" % (sigma, k))
|
||||
X_test_fold = X_test_shuffled[(k - 1) * chunk : k * chunk, :]
|
||||
j_scores[(num_param, sigma)][k - 1] = self._fit(
|
||||
X_train=X_train,
|
||||
X_test=X_test_fold,
|
||||
num_parameters=num_param,
|
||||
sigma=sigma,
|
||||
)
|
||||
j_scores[(num_param, sigma)] = np.mean(j_scores[(num_param, sigma)])
|
||||
|
||||
sorted_scores = sorted(
|
||||
[x for x in j_scores.items() if np.isfinite(x[1])],
|
||||
key=lambda x: x[1],
|
||||
reverse=True,
|
||||
)
|
||||
if len(sorted_scores) == 0:
|
||||
warnings.warn("LCV failed to converge for all values of sigma.")
|
||||
return self
|
||||
self._sigma = sorted_scores[0][0][1]
|
||||
self._num_parameters = sorted_scores[0][0][0]
|
||||
self._j_scores = sorted_scores
|
||||
else:
|
||||
self._sigma = self.sigmas[0]
|
||||
self._num_parameters = self.num_params[0]
|
||||
# best sigma
|
||||
self._j = self._fit(
|
||||
X_train=X_train,
|
||||
X_test=X_test_shuffled,
|
||||
num_parameters=self._num_parameters,
|
||||
sigma=self._sigma,
|
||||
)
|
||||
|
||||
return self # Compatibility with sklearn
|
||||
|
||||
def _fit(self, X_train, X_test, num_parameters, sigma, alpha_0=None):
|
||||
"""Fits the estimator with the given parameters w-hat and returns J"""
|
||||
|
||||
num_parameters = num_parameters
|
||||
|
||||
if type(num_parameters) == float:
|
||||
num_parameters = int(X_test.shape[0] * num_parameters)
|
||||
|
||||
self._select_param_vectors(
|
||||
X_test=X_test, sigma=sigma, num_parameters=num_parameters
|
||||
)
|
||||
|
||||
# if isinstance(X_train, csr_matrix):
|
||||
# X_train = X_train.toarray()
|
||||
X_train = self._reshape_X(X_train)
|
||||
X_test = self._reshape_X(X_test)
|
||||
|
||||
if alpha_0 is None:
|
||||
alpha_0 = np.ones(shape=(num_parameters, 1)) / float(num_parameters)
|
||||
|
||||
self._find_alpha(
|
||||
X_train=X_train,
|
||||
X_test=X_test,
|
||||
num_parameters=num_parameters,
|
||||
epsilon=self.epsilon,
|
||||
alpha_0=alpha_0,
|
||||
sigma=sigma,
|
||||
)
|
||||
|
||||
return self._calculate_j(X_test, sigma=sigma)
|
||||
|
||||
def _calculate_j(self, X_test, sigma):
|
||||
pred = self.predict(X_test, sigma=sigma) + 0.0000001
|
||||
log = np.log(pred).sum()
|
||||
return log / (X_test.shape[0])
|
||||
|
||||
def score(self, X_test):
|
||||
"""Return the J score, similar to sklearn's API"""
|
||||
return self._calculate_j(X_test=X_test, sigma=self._sigma)
|
||||
|
||||
@staticmethod
|
||||
def _reshape_X(X):
|
||||
"""Reshape input from mxn to mx1xn to take advantage of numpy broadcasting."""
|
||||
if len(X.shape) != 3:
|
||||
return X.reshape((X.shape[0], 1, X.shape[1]))
|
||||
return X
|
||||
|
||||
def _select_param_vectors(self, X_test, sigma, num_parameters):
|
||||
"""X_test is the test set. b is the number of parameters."""
|
||||
indices = np.random.choice(X_test.shape[0], size=num_parameters, replace=False)
|
||||
self._test_vectors = X_test[indices, :].copy()
|
||||
self._phi_fitted = True
|
||||
|
||||
def _phi(self, X, sigma=None):
|
||||
if sigma is None:
|
||||
sigma = self._sigma
|
||||
|
||||
if self._phi_fitted:
|
||||
return np.exp(
|
||||
-np.sum((X - self._test_vectors) ** 2, axis=-1) / (2 * sigma**2)
|
||||
)
|
||||
raise Exception("Phi not fitted.")
|
||||
|
||||
def _find_alpha(self, alpha_0, X_train, X_test, num_parameters, sigma, epsilon):
|
||||
A = np.zeros(shape=(X_test.shape[0], num_parameters))
|
||||
b = np.zeros(shape=(num_parameters, 1))
|
||||
|
||||
A = self._phi(X_test, sigma)
|
||||
b = self._phi(X_train, sigma).sum(axis=0) / X_train.shape[0]
|
||||
b = b.reshape((num_parameters, 1))
|
||||
|
||||
out = alpha_0.copy()
|
||||
for k in range(self.max_iter):
|
||||
mat = np.dot(A, out)
|
||||
mat += 0.000000001
|
||||
out += epsilon * np.dot(np.transpose(A), 1.0 / mat)
|
||||
out += b * (
|
||||
((1 - np.dot(np.transpose(b), out)) / np.dot(np.transpose(b), b))
|
||||
)
|
||||
out = np.maximum(0, out)
|
||||
out /= np.dot(np.transpose(b), out)
|
||||
|
||||
self._alpha = out
|
||||
self._fitted = True
|
||||
|
||||
def predict(self, X, sigma=None):
|
||||
"""Equivalent of w(X) from the original paper."""
|
||||
|
||||
X = self._reshape_X(X)
|
||||
if not self._fitted:
|
||||
raise Exception("Not fitted!")
|
||||
return np.dot(self._phi(X, sigma=sigma), self._alpha).reshape((X.shape[0],))
|
|
@ -1,141 +0,0 @@
|
|||
# Copyright 2018 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
from sklearn.neighbors import KDTree, KNeighborsClassifier
|
||||
|
||||
|
||||
class TrustScore:
|
||||
"""
|
||||
Trust Score: a measure of classifier uncertainty based on nearest neighbors.
|
||||
"""
|
||||
|
||||
def __init__(self, k=10, alpha=0.0, filtering="none", min_dist=1e-12):
|
||||
"""
|
||||
k and alpha are the tuning parameters for the filtering,
|
||||
filtering: method of filtering. option are "none", "density",
|
||||
"uncertainty"
|
||||
min_dist: some small number to mitigate possible division by 0.
|
||||
"""
|
||||
self.k = k
|
||||
self.filtering = filtering
|
||||
self.alpha = alpha
|
||||
self.min_dist = min_dist
|
||||
|
||||
def filter_by_density(self, X: np.array):
|
||||
"""Filter out points with low kNN density.
|
||||
|
||||
Args:
|
||||
X: an array of sample points.
|
||||
|
||||
Returns:
|
||||
A subset of the array without points in the bottom alpha-fraction of
|
||||
original points of kNN density.
|
||||
"""
|
||||
kdtree = KDTree(X)
|
||||
knn_radii = kdtree.query(X, k=self.k)[0][:, -1]
|
||||
eps = np.percentile(knn_radii, (1 - self.alpha) * 100)
|
||||
return X[np.where(knn_radii <= eps)[0], :]
|
||||
|
||||
def filter_by_uncertainty(self, X: np.array, y: np.array):
|
||||
"""Filter out points with high label disagreement amongst its kNN neighbors.
|
||||
|
||||
Args:
|
||||
X: an array of sample points.
|
||||
|
||||
Returns:
|
||||
A subset of the array without points in the bottom alpha-fraction of
|
||||
samples with highest disagreement amongst its k nearest neighbors.
|
||||
"""
|
||||
neigh = KNeighborsClassifier(n_neighbors=self.k)
|
||||
neigh.fit(X, y)
|
||||
confidence = neigh.predict_proba(X)
|
||||
cutoff = np.percentile(confidence, self.alpha * 100)
|
||||
unfiltered_idxs = np.where(confidence >= cutoff)[0]
|
||||
return X[unfiltered_idxs, :], y[unfiltered_idxs]
|
||||
|
||||
def fit(self, X: np.array, y: np.array):
|
||||
"""Initialize trust score precomputations with training data.
|
||||
|
||||
WARNING: assumes that the labels are 0-indexed (i.e.
|
||||
0, 1,..., n_labels-1).
|
||||
|
||||
Args:
|
||||
X: an array of sample points.
|
||||
y: corresponding labels.
|
||||
"""
|
||||
|
||||
self.n_labels = np.max(y) + 1
|
||||
self.kdtrees = [None] * self.n_labels
|
||||
if self.filtering == "uncertainty":
|
||||
X_filtered, y_filtered = self.filter_by_uncertainty(X, y)
|
||||
for label in range(self.n_labels):
|
||||
if self.filtering == "none":
|
||||
X_to_use = X[np.where(y == label)[0]]
|
||||
self.kdtrees[label] = KDTree(X_to_use)
|
||||
elif self.filtering == "density":
|
||||
X_to_use = self.filter_by_density(X[np.where(y == label)[0]])
|
||||
self.kdtrees[label] = KDTree(X_to_use)
|
||||
elif self.filtering == "uncertainty":
|
||||
X_to_use = X_filtered[np.where(y_filtered == label)[0]]
|
||||
self.kdtrees[label] = KDTree(X_to_use)
|
||||
|
||||
if len(X_to_use) == 0:
|
||||
print(
|
||||
"Filtered too much or missing examples from a label! Please lower "
|
||||
"alpha or check data."
|
||||
)
|
||||
|
||||
def get_score(self, X: np.array, y_pred: np.array):
|
||||
"""Compute the trust scores.
|
||||
|
||||
Given a set of points, determines the distance to each class.
|
||||
|
||||
Args:
|
||||
X: an array of sample points.
|
||||
y_pred: The predicted labels for these points.
|
||||
|
||||
Returns:
|
||||
The trust score, which is ratio of distance to closest class that was not
|
||||
the predicted class to the distance to the predicted class.
|
||||
"""
|
||||
d = np.tile(None, (X.shape[0], self.n_labels))
|
||||
for label_idx in range(self.n_labels):
|
||||
d[:, label_idx] = self.kdtrees[label_idx].query(X, k=2)[0][:, -1]
|
||||
|
||||
sorted_d = np.sort(d, axis=1)
|
||||
d_to_pred = d[range(d.shape[0]), y_pred]
|
||||
d_to_closest_not_pred = np.where(
|
||||
sorted_d[:, 0] != d_to_pred, sorted_d[:, 0], sorted_d[:, 1]
|
||||
)
|
||||
return d_to_closest_not_pred / (d_to_pred + self.min_dist)
|
||||
|
||||
|
||||
class KNNConfidence:
|
||||
"""Baseline which uses disagreement to kNN classifier.
|
||||
"""
|
||||
|
||||
def __init__(self, k=10):
|
||||
self.k = k
|
||||
|
||||
def fit(self, X, y):
|
||||
self.kdtree = KDTree(X)
|
||||
self.y = y
|
||||
|
||||
def get_score(self, X, y_pred):
|
||||
knn_idxs = self.kdtree.query(X, k=self.k)[1]
|
||||
knn_outputs = self.y[knn_idxs]
|
||||
return np.mean(
|
||||
knn_outputs == np.transpose(np.tile(y_pred, (self.k, 1))), axis=1
|
||||
)
|
|
@ -1,286 +0,0 @@
|
|||
# Copyright 2018 Google LLC
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import numpy as np
|
||||
from sklearn.model_selection import StratifiedShuffleSplit
|
||||
import matplotlib.pyplot as plt
|
||||
from sklearn.decomposition import PCA
|
||||
import matplotlib.cm as cm
|
||||
from sklearn.metrics import precision_recall_curve
|
||||
import tensorflow as tf
|
||||
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.svm import LinearSVC
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
|
||||
|
||||
def run_logistic(X_train, y_train, X_test, y_test, get_training=False):
|
||||
model = LogisticRegression()
|
||||
model.fit(X_train, y_train)
|
||||
y_pred = model.predict(X_test)
|
||||
all_confidence = model.predict_proba(X_test)
|
||||
confidences = all_confidence[range(len(y_pred)), y_pred]
|
||||
if not get_training:
|
||||
return y_pred, confidences
|
||||
y_pred_training = model.predict(X_train)
|
||||
all_confidence_training = model.predict_proba(X_train)
|
||||
confidence_training = all_confidence_training[range(len(y_pred_training)),
|
||||
y_pred_training]
|
||||
return y_pred, confidences, y_pred_training, confidence_training
|
||||
|
||||
|
||||
def run_linear_svc(X_train, y_train, X_test, y_test, get_training=False):
|
||||
model = LinearSVC()
|
||||
model.fit(X_train, y_train)
|
||||
y_pred = model.predict(X_test)
|
||||
all_confidence = model.decision_function(X_test)
|
||||
confidences = all_confidence[range(len(y_pred)), y_pred]
|
||||
if not get_training:
|
||||
return y_pred, confidences
|
||||
y_pred_training = model.predict(X_train)
|
||||
all_confidence_training = model.decision_function(X_train)
|
||||
confidence_training = all_confidence_training[range(len(y_pred_training)),
|
||||
y_pred_training]
|
||||
return y_pred, confidences, y_pred_training, confidence_training
|
||||
|
||||
|
||||
def run_random_forest(X_train, y_train, X_test, y_test, get_training=False):
|
||||
model = RandomForestClassifier()
|
||||
model.fit(X_train, y_train)
|
||||
y_pred = model.predict(X_test)
|
||||
all_confidence = model.predict_proba(X_test)
|
||||
confidences = all_confidence[range(len(y_pred)), y_pred]
|
||||
if not get_training:
|
||||
return y_pred, confidences
|
||||
y_pred_training = model.predict(X_train)
|
||||
all_confidence_training = model.predict_proba(X_train)
|
||||
confidence_training = all_confidence_training[range(len(y_pred_training)),
|
||||
y_pred_training]
|
||||
return y_pred, confidences, y_pred_training, confidence_training
|
||||
|
||||
|
||||
def run_simple_NN(X,
|
||||
y,
|
||||
X_test,
|
||||
y_test,
|
||||
num_iter=10000,
|
||||
hidden_units=100,
|
||||
learning_rate=0.05,
|
||||
batch_size=100,
|
||||
display_steps=1000,
|
||||
n_layers=1,
|
||||
get_training=False):
|
||||
"""Run a NN with a single layer on some data.
|
||||
|
||||
Returns the predicted values as well as the confidences.
|
||||
"""
|
||||
n_labels = np.max(y) + 1
|
||||
n_features = X.shape[1]
|
||||
|
||||
x = tf.placeholder(tf.float32, [None, n_features])
|
||||
y_ = tf.placeholder(tf.float32, [None, n_labels])
|
||||
|
||||
def simple_NN(input_placeholder, n_layers):
|
||||
|
||||
W_in = weight_variable([n_features, hidden_units])
|
||||
b_in = bias_variable([hidden_units])
|
||||
W_mid = [
|
||||
weight_variable([hidden_units, hidden_units])
|
||||
for i in range(n_layers - 1)
|
||||
]
|
||||
b_mid = [bias_variable([hidden_units]) for i in range(n_layers - 1)]
|
||||
W_out = weight_variable([hidden_units, n_labels])
|
||||
b_out = bias_variable([n_labels])
|
||||
|
||||
layers = [tf.nn.relu(tf.matmul(input_placeholder, W_in) + b_in)]
|
||||
for i in range(n_layers - 1):
|
||||
layer = tf.nn.relu(tf.matmul(layers[-1], W_mid[i]) + b_mid[i])
|
||||
layers.append(layer)
|
||||
|
||||
logits = tf.matmul(layers[-1], W_out) + b_out
|
||||
return logits
|
||||
|
||||
NN_logits = simple_NN(x, n_layers)
|
||||
|
||||
cross_entropy = tf.reduce_mean(
|
||||
tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=NN_logits))
|
||||
train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
|
||||
correct_prediction = tf.equal(tf.argmax(NN_logits, 1), tf.argmax(y_, 1))
|
||||
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
|
||||
|
||||
def one_hot(ns):
|
||||
return np.eye(n_labels)[ns]
|
||||
|
||||
y_onehot = one_hot(y)
|
||||
y_test_onehot = one_hot(y_test)
|
||||
|
||||
with tf.Session() as sess:
|
||||
sess.run(tf.global_variables_initializer())
|
||||
for i in range(num_iter):
|
||||
ns = np.random.randint(0, len(X), size=batch_size)
|
||||
if (i + 1) % display_steps == 0:
|
||||
train_accuracy = accuracy.eval(feed_dict={x: X, y_: y_onehot})
|
||||
test_accuracy = accuracy.eval(feed_dict={x: X_test, y_: y_test_onehot})
|
||||
|
||||
print("step %d, training accuracy %g, test accuracy %g" %
|
||||
(i + 1, train_accuracy, test_accuracy))
|
||||
train_step.run(feed_dict={x: X[ns, :], y_: y_onehot[ns, :]})
|
||||
|
||||
testing_logits = NN_logits.eval(feed_dict={x: X_test})
|
||||
testing_prediction = tf.argmax(NN_logits, 1).eval(feed_dict={x: X_test})
|
||||
NN_softmax = tf.nn.softmax(NN_logits).eval(feed_dict={x: X_test})
|
||||
testing_confidence_raw = tf.reduce_max(NN_softmax,
|
||||
1).eval(feed_dict={x: X_test})
|
||||
|
||||
if not get_training:
|
||||
return testing_prediction, testing_confidence_raw
|
||||
training_prediction = tf.argmax(NN_logits, 1).eval(feed_dict={x: X})
|
||||
NN_softmax = tf.nn.softmax(NN_logits).eval(feed_dict={x: X})
|
||||
training_confidence_raw = tf.reduce_max(NN_softmax,
|
||||
1).eval(feed_dict={x: X})
|
||||
return testing_prediction, testing_confidence_raw, training_prediction, training_confidence_raw
|
||||
|
||||
|
||||
def plot_precision_curve(
|
||||
extra_plot_title,
|
||||
percentile_levels,
|
||||
signal_names,
|
||||
final_TPs,
|
||||
final_stderrs,
|
||||
final_misclassification,
|
||||
model_name="Model",
|
||||
colors=["blue", "darkorange", "brown", "red", "purple"],
|
||||
legend_loc=None,
|
||||
figure_size=None,
|
||||
ylim=None):
|
||||
if figure_size is not None:
|
||||
plt.figure(figsize=figure_size)
|
||||
title = "Precision Curve" if extra_plot_title == "" else extra_plot_title
|
||||
plt.title(title, fontsize=20)
|
||||
colors = colors + list(cm.rainbow(np.linspace(0, 1, len(final_TPs))))
|
||||
|
||||
plt.xlabel("Percentile level", fontsize=18)
|
||||
plt.ylabel("Precision", fontsize=18)
|
||||
for i, signal_name in enumerate(signal_names):
|
||||
ls = "--" if ("Model" in signal_name) else "-"
|
||||
plt.plot(
|
||||
percentile_levels, final_TPs[i], ls, c=colors[i], label=signal_name)
|
||||
|
||||
plt.fill_between(
|
||||
percentile_levels,
|
||||
final_TPs[i] - final_stderrs[i],
|
||||
final_TPs[i] + final_stderrs[i],
|
||||
color=colors[i],
|
||||
alpha=0.1)
|
||||
|
||||
if legend_loc is None:
|
||||
if 0. in percentile_levels:
|
||||
plt.legend(loc="lower right", fontsize=14)
|
||||
else:
|
||||
plt.legend(loc="upper left", fontsize=14)
|
||||
else:
|
||||
if legend_loc == "outside":
|
||||
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left", fontsize=14)
|
||||
else:
|
||||
plt.legend(loc=legend_loc, fontsize=14)
|
||||
if ylim is not None:
|
||||
plt.ylim(*ylim)
|
||||
model_acc = 100 * (1 - final_misclassification)
|
||||
plt.axvline(x=model_acc, linestyle="dotted", color="black")
|
||||
plt.show()
|
||||
|
||||
|
||||
def run_precision_recall_experiment_general(X,
|
||||
y,
|
||||
n_repeats,
|
||||
percentile_levels,
|
||||
trainer,
|
||||
test_size=0.5,
|
||||
extra_plot_title="",
|
||||
signals=[],
|
||||
signal_names=[],
|
||||
predict_when_correct=False,
|
||||
skip_print=False):
|
||||
|
||||
def get_stderr(L):
|
||||
return np.std(L) / np.sqrt(len(L))
|
||||
|
||||
all_signal_names = ["Model Confidence"] + signal_names
|
||||
all_TPs = [[[] for p in percentile_levels] for signal in all_signal_names]
|
||||
misclassifications = []
|
||||
sign = 1 if predict_when_correct else -1
|
||||
sss = StratifiedShuffleSplit(
|
||||
n_splits=n_repeats, test_size=test_size, random_state=0)
|
||||
for train_idx, test_idx in sss.split(X, y):
|
||||
X_train = X[train_idx, :]
|
||||
y_train = y[train_idx]
|
||||
X_test = X[test_idx, :]
|
||||
y_test = y[test_idx]
|
||||
testing_prediction, testing_confidence_raw = trainer(
|
||||
X_train, y_train, X_test, y_test)
|
||||
target_points = np.where(
|
||||
testing_prediction == y_test)[0] if predict_when_correct else np.where(
|
||||
testing_prediction != y_test)[0]
|
||||
|
||||
final_signals = [testing_confidence_raw]
|
||||
for signal in signals:
|
||||
signal.fit(X_train, y_train)
|
||||
final_signals.append(signal.get_score(X_test, testing_prediction))
|
||||
|
||||
for p, percentile_level in enumerate(percentile_levels):
|
||||
all_high_confidence_points = [
|
||||
np.where(sign * signal >= np.percentile(sign *
|
||||
signal, percentile_level))[0]
|
||||
for signal in final_signals
|
||||
]
|
||||
|
||||
if 0 in map(len, all_high_confidence_points):
|
||||
continue
|
||||
TP = [
|
||||
len(np.intersect1d(high_confidence_points, target_points)) /
|
||||
(1. * len(high_confidence_points))
|
||||
for high_confidence_points in all_high_confidence_points
|
||||
]
|
||||
for i in range(len(all_signal_names)):
|
||||
all_TPs[i][p].append(TP[i])
|
||||
misclassifications.append(len(target_points) / (1. * len(X_test)))
|
||||
|
||||
final_TPs = [[] for signal in all_signal_names]
|
||||
final_stderrs = [[] for signal in all_signal_names]
|
||||
for p, percentile_level in enumerate(percentile_levels):
|
||||
for i in range(len(all_signal_names)):
|
||||
final_TPs[i].append(np.mean(all_TPs[i][p]))
|
||||
final_stderrs[i].append(get_stderr(all_TPs[i][p]))
|
||||
|
||||
if not skip_print:
|
||||
print("Precision at percentile", percentile_level)
|
||||
ss = ""
|
||||
for i, signal_name in enumerate(all_signal_names):
|
||||
ss += (signal_name + (": %.4f " % final_TPs[i][p]))
|
||||
print(ss)
|
||||
print()
|
||||
|
||||
final_misclassification = np.mean(misclassifications)
|
||||
|
||||
if not skip_print:
|
||||
print("Misclassification rate mean/std", np.mean(misclassifications),
|
||||
get_stderr(misclassifications))
|
||||
|
||||
for i in range(len(all_signal_names)):
|
||||
final_TPs[i] = np.array(final_TPs[i])
|
||||
final_stderrs[i] = np.array(final_stderrs[i])
|
||||
|
||||
plot_precision_curve(extra_plot_title, percentile_levels, all_signal_names,
|
||||
final_TPs, final_stderrs, final_misclassification)
|
||||
return (all_signal_names, final_TPs, final_stderrs, final_misclassification)
|
Loading…
Reference in New Issue