Source code for psdr.opg

from __future__ import division, print_function

import numpy as np
from scipy.spatial.distance import cdist
from .subspace import SubspaceBasedDimensionReduction, ActiveSubspace
from .local_linear import local_linear_grads


__all__ = ['OuterProductGradient']


[docs]class OuterProductGradient(ActiveSubspace): r""" The Outer Product Gradient approach of Hardle and Stoker Given pairs of inputs :math:`\mathbf{x}_j` and outputs :math:`f(\mathbf{x}_j)`, this method estimates the active subspace by (1) estimating gradients from these samples and then (2) using the eigendecomposition of the outer product of these approximate gradients The gradients are estimated by fitting a linear model at each input point :math:`\mathbf{x}_j` weighting the other points based on a kernel :math:`k` that decreases with increasing distance; i.e., at :math:`\mathbf{x}_j` we construct the linear model :math:`\mathbf{a}^\top \mathbf{x} + \beta`: by solving a linear system: .. math:: \min_{\mathbf{a}, \beta} \left\| \begin{bmatrix} \sqrt{k(\| \mathbf{x}_1 - \mathbf{x}_j\|)} & & & \\ & \sqrt{k(\| \mathbf{x}_2 - \mathbf{x}_j\|)} & & \\ & & \ddots & \\ & & & \sqrt{k(\|\mathbf{x}_M - \mathbf{x}_j\|)} \end{bmatrix} \left[ \begin{bmatrix} \mathbf{x}_1^\top - \mathbf{x}_j^\top & 1 \\ \mathbf{x}_2^\top - \mathbf{x}_j^\top & 1 \\ \vdots & \vdots \\ \mathbf{x}_M^\top - \mathbf{x}_j^\top & 1 \\ \end{bmatrix} \begin{bmatrix} \mathbf{a} \\ \beta \end{bmatrix} - \begin{bmatrix} f(\mathbf{x}_1) \\ f(\mathbf{x}_2) \\ \vdots \\ f(\mathbf{x}_M) \end{bmatrix} \right] \right\|_2. Our implementation is a modification of the algorithm presented by Li [Li18]_. Gradients are estimated by calling :code:`psdr.local_linear_grad` which uses a more numerically stable approach for estimating gradients than that above. Additionally, by default we use a per-location bandwidth based on perplexity. Parameters ---------- standardize: bool If True, standardize input X to have zero mean and identity covariance. perplexity: None or float Entropy target for choosing bandwidth bandwidth: None, 'xia' or positive float If specified, set a global bandwidth. 'xia' uses the heuristic suggested in [Li18]_. References ---------- .. [HS89] W. Hardle and T. M. Stoker. Investigating Smooth Multiple Regression by the Method of Average Derivatives J. Am. Stat. Ass. Vol 84, No 408, (1989). pp 986--995, DOI:0.1080/01621459.1989.10478863 .. [Li18] Bing Li. Sufficient Dimension Reduction: Methods and Applications with R. CRC Press, 2018. """ def __init__(self, standardize = True, perplexity = None, bandwidth = None): self.standardize = standardize self.perplexity = perplexity self.bandwidth = bandwidth def __str__(self): return "<Outer Product Gradient>"
[docs] def fit(self, X, fX): r""" Parameters ---------- X: array-like (M,m) Location of function samples fX: array-like (M,) Function samples where fX[j] = f(X[j]) """ X = np.atleast_2d(X) fX = np.atleast_1d(fX) if len(fX.shape) > 1: fX = fX.flatten() assert len(fX) == len(X), "Must provide one-dimensional inputs of same dimension as X" assert len(fX) == len(X), "Number of points X must match number of function samples fX" N = len(X) # Step 1: Standardize data if self.standardize: mean = np.mean(X, axis = 0) std = np.std(X, axis = 0) Z = (X - mean)/std else: Z = X # Step 2: Setup the kernel if self.bandwidth == 'xia': # Bandwidth from Xia 2007, [Li18, eq. 11.5] bandwidth = 2.34*len(X)**(-1./(max(X.shape[1], 3) +6)) else: bandwidth = self.bandwidth # Step 3: Estimate gradients z_grads = local_linear_grads(Z, fX, perplexity = self.perplexity, bandwidth = self.bandwidth) # Step 4: identify the active subspace ActiveSubspace.fit(self, z_grads) if self.standardize: # Undo the coordiante change introduced by standardization Dinv2 = np.diag(std**(-0.5)) U = Dinv2.dot(self.U) Q, R = np.linalg.qr(U, mode = 'reduced') self._U = Q self._U = self._fix_subspace_signs_grads(self._U, z_grads)