Source code for neuroptica.nonlinearities

'''This module contains a collection of physical and aphysical activation functions. Nonlinearities can be incorporated
into an optical neural network by using the Activation(nonlinearity) NetworkLayer.'''


import numpy as np

from neuroptica.settings import NP_COMPLEX


class Nonlinearity:

    def __init__(self, N):
        '''
        Initialize the nonlinearity
        :param N: dimensionality of the nonlinear function
        '''
        self.N = N  # Dimensionality of the nonlinearity

    def forward_pass(self, X: np.ndarray) -> np.ndarray:
        '''
        Transform the input fields in the forward direction
        :param X: input fields
        :return: transformed inputs
        '''
        raise NotImplementedError('forward_pass() must be overridden in child class!')

    def backward_pass(self, gamma: np.ndarray, Z: np.ndarray) -> np.ndarray:
        '''
        Backpropagate a signal through the layer
        :param gamma: backpropagated signal from the (l+1)th layer
        :param Z: output fields from the forward_pass() run
        :return: backpropagated fields delta_l
        '''
        raise NotImplementedError('backward_pass() must be overridden in child class!')

    def __repr__(self):
        return type(self).__name__ + '(N={})'.format(self.N)


[docs]class ComplexNonlinearity(Nonlinearity): ''' Base class for a complex-valued nonlinearity '''
[docs] def __init__(self, N, holomorphic=False, mode="condensed"): ''' Initialize the nonlinearity :param N: dimensionality of the nonlinear function :param holomorphic: whether the function is holomorphic :param mode: for nonholomorphic functions, can be "full", "condensed", or "polar". Full requires that you specify 4 derivatives for d{Re,Im}/d{Re,Im}, condensed requires only df/d{Re,Im}, and polar takes Z=re^iphi ''' super().__init__(N) self.holomorphic = holomorphic # Whether the function is holomorphic self.mode = mode # Whether to fully expand to du/da or to use df/da
[docs] def forward_pass(self, X: np.ndarray) -> np.ndarray: ''' Transform the input fields in the forward direction :param X: input fields :return: transformed inputs ''' raise NotImplementedError('forward_pass() must be overridden in child class!')
[docs] def backward_pass(self, gamma: np.ndarray, Z: np.ndarray) -> np.ndarray: ''' Backpropagate a signal through the layer :param gamma: backpropagated signal from the (l+1)th layer :param Z: output fields from the forward_pass() run :return: backpropagated fields delta_l ''' # raise NotImplementedError('backward_pass() must be overridden in child class!') if self.holomorphic: return gamma * self.df_dZ(Z) else: if self.mode == "full": a, b = np.real(Z), np.imag(Z) return np.real(gamma) * (self.dRe_dRe(a, b) - 1j * self.dRe_dIm(a, b)) + \ np.imag(gamma) * (-1 * self.dIm_dRe(a, b) + 1j * self.dIm_dIm(a, b)) elif self.mode == "condensed": a, b = np.real(Z), np.imag(Z) return np.real(gamma * self.df_dRe(a, b)) - 1j * np.real(gamma * self.df_dIm(a, b)) elif self.mode == "polar": r, phi = np.abs(Z), np.angle(Z) return np.exp(-1j * phi) * \ (np.real(gamma * self.df_dr(r, phi)) - 1j / r * np.real(gamma * self.df_dphi(r, phi)))
[docs] def df_dZ(self, Z: np.ndarray) -> np.ndarray: '''Gives the total complex derivative of the (holomorphic) nonlinearity with respect to the input''' raise NotImplementedError
[docs] def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: '''Gives the derivative of the nonlinearity with respect to the real part alpha of the input''' raise NotImplementedError
[docs] def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: '''Gives the derivative of the nonlinearity with respect to the imaginary part beta of the input''' raise NotImplementedError
[docs] def dRe_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: '''Gives the derivative of the real part of the nonlienarity w.r.t. the real part of the input''' raise NotImplementedError
[docs] def dRe_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: '''Gives the derivative of the real part of the nonlienarity w.r.t. the imaginary part of the input''' raise NotImplementedError
[docs] def dIm_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: '''Gives the derivative of the imaginary part of the nonlienarity w.r.t. the real part of the input''' raise NotImplementedError
[docs] def dIm_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: '''Gives the derivative of the imaginary part of the nonlienarity w.r.t. the imaginary part of the input''' raise NotImplementedError
[docs] def df_dr(self, r: np.ndarray, phi: np.ndarray) -> np.ndarray: '''Gives the derivative of the nonlinearity with respect to the magnitude r of the input''' raise NotImplementedError
[docs] def df_dphi(self, r: np.ndarray, phi: np.ndarray) -> np.ndarray: '''Gives the derivative of the nonlinearity with respect to the angle phi of the input''' raise NotImplementedError
[docs]class SPMActivation(ComplexNonlinearity): ''' Lossless SPM activation function Parameters --------------- phase_gain [ rad/(V^2/m^2) ] : The amount of phase shift per unit input "power" '''
[docs] def __init__(self, N, gain): super().__init__(N, mode="condensed") self.gain = gain
[docs] def forward_pass(self, Z: np.ndarray): gain = self.gain return Z * np.exp(-1j * gain * np.square(np.abs(Z)))
[docs] def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: gain = self.gain Z = a + 1j * b return np.exp(-1j * gain * np.square(np.abs(Z))) * (-2j * np.square(a) * gain + 2 * a * b * gain + 1)
[docs] def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: gain = self.gain Z = a + 1j * b return np.exp(-1j * gain * np.square(np.abs(Z))) * (-2j * a * b * gain + 2 * np.square(b) * gain + 1j)
[docs]class ElectroOpticActivation(ComplexNonlinearity): ''' Electro-optic activation function with intensity modulation (remod). This activation can be configured either in terms of its physical parameters, detailed below, or directly in terms of the feedforward phase gain, g and the biasing phase, phi_b. If the electro-optic parameters below are specified g and phi_b are computed for the user. Physical parameters and units ------------------------------ alpha: Amount of power tapped off to PD [unitless] responsivity: PD responsivity [Watts/amp] area: Modal area [micron^2] V_pi: Modulator V_pi (voltage required for a pi phase shift) [Volts] V_bias: Modulator static bias [Volts] R: Transimpedance gain [Ohms] impedance: Characteristic impedance for computing optical power [Ohms] '''
[docs] def __init__(self, N, alpha=0.1, responsivity=0.8, area=1.0, V_pi=10.0, V_bias=10.0, R=1e3, impedance=120 * np.pi, g=None, phi_b=None): super().__init__(N, mode="condensed") self.alpha = alpha if g is not None and phi_b is not None: self.g = g self.phi_b = phi_b else: # Convert into "feedforward phase gain" and "phase bias" parameters self.g = np.pi * alpha * R * responsivity * area * 1e-12 / 2 / V_pi / impedance self.phi_b = np.pi * V_bias / V_pi
[docs] def forward_pass(self, Z: np.ndarray): alpha, g, phi_b = self.alpha, self.g, self.phi_b return 1j * np.sqrt(1 - alpha) * np.exp(-1j * 0.5 * g * np.square(np.abs(Z)) - 1j * 0.5 * phi_b) * np.cos( 0.5 * g * np.square(np.abs(Z)) + 0.5 * phi_b) * Z
[docs] def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: alpha, g, phi_b = self.alpha, self.g, self.phi_b return np.sqrt(1 - alpha) * np.exp((-0.5 * 1j) * g * (a - 1j * b) * (a + 1j * b) - (0.5 * 1j) * phi_b) * ( a * g * (b - 1j * a) * np.sin(0.5 * a ** 2 * g + 0.5 * b ** 2 * g + 0.5 * phi_b) + ( a ** 2 * g + 1j * a * b * g + 1j) * np.cos(0.5 * a ** 2 * g + 0.5 * b ** 2 * g + 0.5 * phi_b))
[docs] def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: alpha, g, phi_b = self.alpha, self.g, self.phi_b return np.sqrt(1 - alpha) * np.exp((-0.5 * 1j) * g * (a - 1j * b) * (a + 1j * b) - (0.5 * 1j) * phi_b) * ( b * g * (b - 1j * a) * np.sin(0.5 * a ** 2 * g + 0.5 * b ** 2 * g + 0.5 * phi_b) + ( a * b * g + 1j * b ** 2 * g - 1) * np.cos(0.5 * a ** 2 * g + 0.5 * b ** 2 * g + 0.5 * phi_b))
[docs]class Abs(ComplexNonlinearity): ''' Represents a transformation z -> |z|. This can be called in any of "full", "condensed", and "polar" modes '''
[docs] def __init__(self, N, mode="polar"): super().__init__(N, holomorphic=False, mode=mode)
[docs] def forward_pass(self, X: np.ndarray): return np.abs(X)
[docs] def dRe_dRe(self, a: np.ndarray, b: np.ndarray): return a / np.sqrt(a ** 2 + b ** 2)
[docs] def dRe_dIm(self, a: np.ndarray, b: np.ndarray): return b / np.sqrt(a ** 2 + b ** 2)
[docs] def dIm_dRe(self, a: np.ndarray, b: np.ndarray): return 0 * a
[docs] def dIm_dIm(self, a: np.ndarray, b: np.ndarray): return 0 * b
[docs] def df_dRe(self, a: np.ndarray, b: np.ndarray): return a / np.sqrt(a ** 2 + b ** 2)
[docs] def df_dIm(self, a: np.ndarray, b: np.ndarray): return b / np.sqrt(a ** 2 + b ** 2)
[docs] def df_dr(self, r: np.ndarray, phi: np.ndarray): return np.ones(r.shape, dtype=NP_COMPLEX)
[docs] def df_dphi(self, r: np.ndarray, phi: np.ndarray): return 0 * phi
[docs]class AbsSquared(ComplexNonlinearity): '''Maps z -> |z|^2, corresponding to power measurement by a photodetector.'''
[docs] def __init__(self, N): super().__init__(N, holomorphic=False, mode="polar")
[docs] def forward_pass(self, X: np.ndarray): return np.abs(X) ** 2
[docs] def df_dr(self, r: np.ndarray, phi: np.ndarray): return 2 * r
[docs] def df_dphi(self, r: np.ndarray, phi: np.ndarray): return 0 * phi
[docs]class Sigmoid(Nonlinearity): '''Sigmoid activation; maps z -> 1 / (1 + np.exp(-z))'''
[docs] def forward_pass(self, X: np.ndarray): return 1 / (1 + np.exp(-X))
[docs] def backward_pass(self, gamma: np.ndarray, Z: np.ndarray): sigma = 1 / (1 + np.exp(-Z)) return sigma * (1 - sigma) * gamma
[docs]class SoftMax(Nonlinearity): '''Applies softmax to the inputs. Do not use in with categorical cross entropy, which implicitly includes this.'''
[docs] def forward_pass(self, X: np.ndarray): return np.exp(X) / np.sum(np.exp(X), axis=0)
[docs] def backward_pass(self, gamma: np.ndarray, Z: np.ndarray): softmax = np.exp(Z) / np.sum(np.exp(Z), axis=0) n_features, n_samples = Z.shape total_derivs = np.zeros(Z.shape, dtype=NP_COMPLEX) for i in range(n_samples): s = softmax[:, i].reshape(-1, 1) jac = np.diagflat(s) - np.dot(s, s.T) total_derivs[:, i] = jac.T @ gamma[:, i] return total_derivs
[docs]class LinearMask(ComplexNonlinearity): '''Technically not a nonlinearity: apply a linear gain/loss to each element'''
[docs] def __init__(self, N: int, mask=None): super().__init__(N, holomorphic=True) if mask is None: self.mask = np.ones(N, dtype=NP_COMPLEX) else: self.mask = np.array(mask, dtype=NP_COMPLEX)
[docs] def forward_pass(self, X: np.ndarray): return (X.T * self.mask).T
[docs] def df_dZ(self, Z: np.ndarray): z_broadcaster = np.ones(Z.shape) return (z_broadcaster.T * self.mask).T
# return ((Z.T * self.mask) / Z.T).T
[docs]class bpReLU(ComplexNonlinearity): ''' Discontinuous (but holomorphic and backpropable) ReLU f(x_i) = alpha * x_i if |x_i| < cutoff f(x_i) = x_i if |x_i| >= cutoff Arguments: ---------- cutoff: value of input |x_i| above which to fully transmit, below which to attentuate alpha: attenuation factor f(x_i) = f '''
[docs] def __init__(self, N, cutoff=1, alpha=0): super().__init__(N, holomorphic=True) self.cutoff = cutoff self.alpha = alpha
[docs] def forward_pass(self, X: np.ndarray): return (np.abs(X) >= self.cutoff) * X + (np.abs(X) < self.cutoff) * self.alpha * X
[docs] def df_dZ(self, Z: np.ndarray): return (np.abs(Z) >= self.cutoff) * 1 + (np.abs(Z) < self.cutoff) * self.alpha * 1
[docs]class modReLU(ComplexNonlinearity): ''' Contintous, but non-holomorphic and non-simply backpropabable ReLU of the form f(z) = (|z| - cutoff) * z / |z| if |z| >= cutoff (else 0) see: https://arxiv.org/pdf/1705.09792.pdf (note, cutoff subtracted in this definition) Arguments: ---------- cutoff: value of input |x_i| above which to '''
[docs] def __init__(self, N, cutoff=1): super().__init__(N, holomorphic=False, mode="polar") self.cutoff = cutoff
[docs] def forward_pass(self, X: np.ndarray): return (np.abs(X) >= self.cutoff) * (np.abs(X) - self.cutoff) * X / np.abs(X)
[docs] def df_dr(self, r: np.ndarray, phi: np.ndarray): return (r >= self.cutoff) * np.exp(1j * phi)
[docs] def df_dphi(self, r: np.ndarray, phi: np.ndarray): return (r >= self.cutoff) * 1j * (r - self.cutoff) * np.exp(1j * phi)
[docs]class cReLU(ComplexNonlinearity): ''' Contintous, but non-holomorphic and non-simply backpropabable ReLU of the form f(z) = ReLU(Re{z}) + 1j * ReLU(Im{z}) see: https://arxiv.org/pdf/1705.09792.pdf '''
[docs] def __init__(self, N): super().__init__(N, holomorphic=False, mode="condensed")
[docs] def forward_pass(self, X: np.ndarray): X_re = np.real(X) X_im = np.imag(X) return (X_re > 0) * X_re + 1j * (X_im > 0) * X_im
[docs] def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: return (a > 0)
[docs] def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: return 1j * (b > 0)
[docs]class zReLU(ComplexNonlinearity): ''' Contintous, but non-holomorphic and non-simply backpropabable ReLU of the form f(z) = z if Re{z} > 0 and Im{z} > 0, else 0 see: https://arxiv.org/pdf/1705.09792.pdf '''
[docs] def __init__(self, N): super().__init__(N, holomorphic=False, mode="condensed")
[docs] def forward_pass(self, X: np.ndarray): X_re = np.real(X) X_im = np.imag(X) return (X_re > 0) * (X_im > 0) * X
[docs] def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: return (a > 0) * (b > 0)
[docs] def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray: return (a > 0) * (b > 0) * 1j