Shared transforms between different interpretable models
Expand source code
'''Shared transforms between different interpretable models
'''
import numpy as np
class Winsorizer():
"""Performs Winsorization 1->1*
Warning: this class should not be used directly.
"""
def __init__(self, trim_quantile=0.0):
self.trim_quantile = trim_quantile
self.winsor_lims = None
def train(self, X):
# get winsor limits
self.winsor_lims = np.ones([2, X.shape[1]]) * np.inf
self.winsor_lims[0, :] = -np.inf
if self.trim_quantile > 0:
for i_col in np.arange(X.shape[1]):
lower = np.percentile(X[:, i_col], self.trim_quantile * 100)
upper = np.percentile(X[:, i_col], 100 - self.trim_quantile * 100)
self.winsor_lims[:, i_col] = [lower, upper]
def trim(self, X):
X_ = X.copy()
X_ = np.where(X > self.winsor_lims[1, :], np.tile(self.winsor_lims[1, :], [X.shape[0], 1]),
np.where(X < self.winsor_lims[0, :], np.tile(self.winsor_lims[0, :], [X.shape[0], 1]), X))
return X_
class FriedScale():
"""Performs scaling of linear variables according to Friedman et alpha_l. 2005 Sec 5
Each variable is first Winsorized l->l*, then standardised as 0.4 x l* / std(l*)
Warning: this class should not be used directly.
"""
def __init__(self, winsorizer=None):
self.scale_multipliers = None
self.winsorizer = winsorizer
def train(self, X):
# get multipliers
if self.winsorizer != None:
X_trimmed = self.winsorizer.trim(X)
else:
X_trimmed = X
scale_multipliers = np.ones(X.shape[1])
for i_col in np.arange(X.shape[1]):
num_uniq_vals = len(np.unique(X[:, i_col]))
if num_uniq_vals > 2: # don't scale binary variables which are effectively already rules
scale_multipliers[i_col] = 0.4 / (1.0e-12 + np.std(X_trimmed[:, i_col]))
self.scale_multipliers = scale_multipliers
def scale(self, X):
if self.winsorizer != None:
return self.winsorizer.trim(X) * self.scale_multipliers
else:
return X * self.scale_multipliers
Classes
class FriedScale (winsorizer=None)
-
Performs scaling of linear variables according to Friedman et alpha_l. 2005 Sec 5
Each variable is first Winsorized l->l, then standardised as 0.4 x l / std(l*) Warning: this class should not be used directly.
Expand source code
class FriedScale(): """Performs scaling of linear variables according to Friedman et alpha_l. 2005 Sec 5 Each variable is first Winsorized l->l*, then standardised as 0.4 x l* / std(l*) Warning: this class should not be used directly. """ def __init__(self, winsorizer=None): self.scale_multipliers = None self.winsorizer = winsorizer def train(self, X): # get multipliers if self.winsorizer != None: X_trimmed = self.winsorizer.trim(X) else: X_trimmed = X scale_multipliers = np.ones(X.shape[1]) for i_col in np.arange(X.shape[1]): num_uniq_vals = len(np.unique(X[:, i_col])) if num_uniq_vals > 2: # don't scale binary variables which are effectively already rules scale_multipliers[i_col] = 0.4 / (1.0e-12 + np.std(X_trimmed[:, i_col])) self.scale_multipliers = scale_multipliers def scale(self, X): if self.winsorizer != None: return self.winsorizer.trim(X) * self.scale_multipliers else: return X * self.scale_multipliers
Methods
def scale(self, X)
-
Expand source code
def scale(self, X): if self.winsorizer != None: return self.winsorizer.trim(X) * self.scale_multipliers else: return X * self.scale_multipliers
def train(self, X)
-
Expand source code
def train(self, X): # get multipliers if self.winsorizer != None: X_trimmed = self.winsorizer.trim(X) else: X_trimmed = X scale_multipliers = np.ones(X.shape[1]) for i_col in np.arange(X.shape[1]): num_uniq_vals = len(np.unique(X[:, i_col])) if num_uniq_vals > 2: # don't scale binary variables which are effectively already rules scale_multipliers[i_col] = 0.4 / (1.0e-12 + np.std(X_trimmed[:, i_col])) self.scale_multipliers = scale_multipliers
class Winsorizer (trim_quantile=0.0)
-
Performs Winsorization 1->1*
Warning: this class should not be used directly.
Expand source code
class Winsorizer(): """Performs Winsorization 1->1* Warning: this class should not be used directly. """ def __init__(self, trim_quantile=0.0): self.trim_quantile = trim_quantile self.winsor_lims = None def train(self, X): # get winsor limits self.winsor_lims = np.ones([2, X.shape[1]]) * np.inf self.winsor_lims[0, :] = -np.inf if self.trim_quantile > 0: for i_col in np.arange(X.shape[1]): lower = np.percentile(X[:, i_col], self.trim_quantile * 100) upper = np.percentile(X[:, i_col], 100 - self.trim_quantile * 100) self.winsor_lims[:, i_col] = [lower, upper] def trim(self, X): X_ = X.copy() X_ = np.where(X > self.winsor_lims[1, :], np.tile(self.winsor_lims[1, :], [X.shape[0], 1]), np.where(X < self.winsor_lims[0, :], np.tile(self.winsor_lims[0, :], [X.shape[0], 1]), X)) return X_
Methods
def train(self, X)
-
Expand source code
def train(self, X): # get winsor limits self.winsor_lims = np.ones([2, X.shape[1]]) * np.inf self.winsor_lims[0, :] = -np.inf if self.trim_quantile > 0: for i_col in np.arange(X.shape[1]): lower = np.percentile(X[:, i_col], self.trim_quantile * 100) upper = np.percentile(X[:, i_col], 100 - self.trim_quantile * 100) self.winsor_lims[:, i_col] = [lower, upper]
def trim(self, X)
-
Expand source code
def trim(self, X): X_ = X.copy() X_ = np.where(X > self.winsor_lims[1, :], np.tile(self.winsor_lims[1, :], [X.shape[0], 1]), np.where(X < self.winsor_lims[0, :], np.tile(self.winsor_lims[0, :], [X.shape[0], 1]), X)) return X_