Source code for neuroptica.nonlinearities

'''This module contains a collection of physical and aphysical activation functions. Nonlinearities can be incorporated
into an optical neural network by using the Activation(nonlinearity) NetworkLayer.'''


import numpy as np

from neuroptica.settings import NP_COMPLEX


class Nonlinearity:

    def __init__(self, N):
        '''
        Initialize the nonlinearity
        :param N: dimensionality of the nonlinear function
        '''
        self.N = N  # Dimensionality of the nonlinearity

    def forward_pass(self, X: np.ndarray) -> np.ndarray:
        '''
        Transform the input fields in the forward direction
        :param X: input fields
        :return: transformed inputs
        '''
        raise NotImplementedError('forward_pass() must be overridden in child class!')

    def backward_pass(self, gamma: np.ndarray, Z: np.ndarray) -> np.ndarray:
        '''
        Backpropagate a signal through the layer
        :param gamma: backpropagated signal from the (l+1)th layer
        :param Z: output fields from the forward_pass() run
        :return: backpropagated fields delta_l
        '''
        raise NotImplementedError('backward_pass() must be overridden in child class!')

    def __repr__(self):
        return type(self).__name__ + '(N={})'.format(self.N)


[docs]class ComplexNonlinearity(Nonlinearity):
    '''
    Base class for a complex-valued nonlinearity
    '''

[docs]    def __init__(self, N, holomorphic=False, mode="condensed"):
        '''
        Initialize the nonlinearity
        :param N: dimensionality of the nonlinear function
        :param holomorphic: whether the function is holomorphic
        :param mode: for nonholomorphic functions, can be "full", "condensed", or "polar". Full requires that you
        specify 4 derivatives for d{Re,Im}/d{Re,Im}, condensed requires only df/d{Re,Im}, and polar takes Z=re^iphi
        '''
        super().__init__(N)
        self.holomorphic = holomorphic  # Whether the function is holomorphic
        self.mode = mode  # Whether to fully expand to du/da or to use df/da

[docs]    def forward_pass(self, X: np.ndarray) -> np.ndarray:
        '''
        Transform the input fields in the forward direction
        :param X: input fields
        :return: transformed inputs
        '''
        raise NotImplementedError('forward_pass() must be overridden in child class!')

[docs]    def backward_pass(self, gamma: np.ndarray, Z: np.ndarray) -> np.ndarray:
        '''
        Backpropagate a signal through the layer
        :param gamma: backpropagated signal from the (l+1)th layer
        :param Z: output fields from the forward_pass() run
        :return: backpropagated fields delta_l
        '''
        # raise NotImplementedError('backward_pass() must be overridden in child class!')
        if self.holomorphic:
            return gamma * self.df_dZ(Z)

        else:

            if self.mode == "full":
                a, b = np.real(Z), np.imag(Z)
                return np.real(gamma) * (self.dRe_dRe(a, b) - 1j * self.dRe_dIm(a, b)) + \
                       np.imag(gamma) * (-1 * self.dIm_dRe(a, b) + 1j * self.dIm_dIm(a, b))

            elif self.mode == "condensed":
                a, b = np.real(Z), np.imag(Z)
                return np.real(gamma * self.df_dRe(a, b)) - 1j * np.real(gamma * self.df_dIm(a, b))

            elif self.mode == "polar":
                r, phi = np.abs(Z), np.angle(Z)
                return np.exp(-1j * phi) * \
                       (np.real(gamma * self.df_dr(r, phi)) - 1j / r * np.real(gamma * self.df_dphi(r, phi)))

[docs]    def df_dZ(self, Z: np.ndarray) -> np.ndarray:
        '''Gives the total complex derivative of the (holomorphic) nonlinearity with respect to the input'''
        raise NotImplementedError

[docs]    def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        '''Gives the derivative of the nonlinearity with respect to the real part alpha of the input'''
        raise NotImplementedError

[docs]    def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        '''Gives the derivative of the nonlinearity with respect to the imaginary part beta of the input'''
        raise NotImplementedError

[docs]    def dRe_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        '''Gives the derivative of the real part of the nonlienarity w.r.t. the real part of the input'''
        raise NotImplementedError

[docs]    def dRe_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        '''Gives the derivative of the real part of the nonlienarity w.r.t. the imaginary part of the input'''
        raise NotImplementedError

[docs]    def dIm_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        '''Gives the derivative of the imaginary part of the nonlienarity w.r.t. the real part of the input'''
        raise NotImplementedError

[docs]    def dIm_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        '''Gives the derivative of the imaginary part of the nonlienarity w.r.t. the imaginary part of the input'''
        raise NotImplementedError

[docs]    def df_dr(self, r: np.ndarray, phi: np.ndarray) -> np.ndarray:
        '''Gives the derivative of the nonlinearity with respect to the magnitude r of the input'''
        raise NotImplementedError

[docs]    def df_dphi(self, r: np.ndarray, phi: np.ndarray) -> np.ndarray:
        '''Gives the derivative of the nonlinearity with respect to the angle phi of the input'''
        raise NotImplementedError


[docs]class SPMActivation(ComplexNonlinearity):
    '''
    Lossless SPM activation function

    Parameters
    ---------------
        phase_gain [ rad/(V^2/m^2) ] : The amount of phase shift per unit input "power"
    '''

[docs]    def __init__(self, N, gain):
        super().__init__(N, mode="condensed")
        self.gain = gain

[docs]    def forward_pass(self, Z: np.ndarray):
        gain = self.gain
        return Z * np.exp(-1j * gain * np.square(np.abs(Z)))

[docs]    def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        gain = self.gain
        Z = a + 1j * b
        return np.exp(-1j * gain * np.square(np.abs(Z))) * (-2j * np.square(a) * gain + 2 * a * b * gain + 1)

[docs]    def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        gain = self.gain
        Z = a + 1j * b
        return np.exp(-1j * gain * np.square(np.abs(Z))) * (-2j * a * b * gain + 2 * np.square(b) * gain + 1j)


[docs]class ElectroOpticActivation(ComplexNonlinearity):
    '''
    Electro-optic activation function with intensity modulation (remod).

    This activation can be configured either in terms of its physical parameters, detailed
    below, or directly in terms of the feedforward phase gain, g and the biasing phase, phi_b.

    If the electro-optic parameters below are specified g and phi_b are computed for the user.

    Physical parameters and units
    ------------------------------
        alpha: Amount of power tapped off to PD [unitless]
        responsivity: PD responsivity [Watts/amp]
        area: Modal area [micron^2]
        V_pi: Modulator V_pi (voltage required for a pi phase shift) [Volts]
        V_bias: Modulator static bias [Volts]
        R: Transimpedance gain [Ohms]
        impedance: Characteristic impedance for computing optical power [Ohms]
    '''

[docs]    def __init__(self, N, alpha=0.1, responsivity=0.8, area=1.0,
                 V_pi=10.0, V_bias=10.0, R=1e3, impedance=120 * np.pi,
                 g=None, phi_b=None):

        super().__init__(N, mode="condensed")

        self.alpha = alpha

        if g is not None and phi_b is not None:
            self.g = g
            self.phi_b = phi_b

        else:
            # Convert into "feedforward phase gain" and "phase bias" parameters
            self.g = np.pi * alpha * R * responsivity * area * 1e-12 / 2 / V_pi / impedance
            self.phi_b = np.pi * V_bias / V_pi

[docs]    def forward_pass(self, Z: np.ndarray):
        alpha, g, phi_b = self.alpha, self.g, self.phi_b
        return 1j * np.sqrt(1 - alpha) * np.exp(-1j * 0.5 * g * np.square(np.abs(Z)) - 1j * 0.5 * phi_b) * np.cos(
            0.5 * g * np.square(np.abs(Z)) + 0.5 * phi_b) * Z

[docs]    def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        alpha, g, phi_b = self.alpha, self.g, self.phi_b
        return np.sqrt(1 - alpha) * np.exp((-0.5 * 1j) * g * (a - 1j * b) * (a + 1j * b) - (0.5 * 1j) * phi_b) * (
                a * g * (b - 1j * a) * np.sin(0.5 * a ** 2 * g + 0.5 * b ** 2 * g + 0.5 * phi_b) + (
                a ** 2 * g + 1j * a * b * g + 1j) * np.cos(0.5 * a ** 2 * g + 0.5 * b ** 2 * g + 0.5 * phi_b))

[docs]    def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        alpha, g, phi_b = self.alpha, self.g, self.phi_b
        return np.sqrt(1 - alpha) * np.exp((-0.5 * 1j) * g * (a - 1j * b) * (a + 1j * b) - (0.5 * 1j) * phi_b) * (
                b * g * (b - 1j * a) * np.sin(0.5 * a ** 2 * g + 0.5 * b ** 2 * g + 0.5 * phi_b) + (
                a * b * g + 1j * b ** 2 * g - 1) * np.cos(0.5 * a ** 2 * g + 0.5 * b ** 2 * g + 0.5 * phi_b))


[docs]class Abs(ComplexNonlinearity):
    '''
    Represents a transformation z -> |z|. This can be called in any of "full", "condensed", and "polar" modes
    '''

[docs]    def __init__(self, N, mode="polar"):
        super().__init__(N, holomorphic=False, mode=mode)

[docs]    def forward_pass(self, X: np.ndarray):
        return np.abs(X)

[docs]    def dRe_dRe(self, a: np.ndarray, b: np.ndarray):
        return a / np.sqrt(a ** 2 + b ** 2)

[docs]    def dRe_dIm(self, a: np.ndarray, b: np.ndarray):
        return b / np.sqrt(a ** 2 + b ** 2)

[docs]    def dIm_dRe(self, a: np.ndarray, b: np.ndarray):
        return 0 * a

[docs]    def dIm_dIm(self, a: np.ndarray, b: np.ndarray):
        return 0 * b

[docs]    def df_dRe(self, a: np.ndarray, b: np.ndarray):
        return a / np.sqrt(a ** 2 + b ** 2)

[docs]    def df_dIm(self, a: np.ndarray, b: np.ndarray):
        return b / np.sqrt(a ** 2 + b ** 2)

[docs]    def df_dr(self, r: np.ndarray, phi: np.ndarray):
        return np.ones(r.shape, dtype=NP_COMPLEX)

[docs]    def df_dphi(self, r: np.ndarray, phi: np.ndarray):
        return 0 * phi


[docs]class AbsSquared(ComplexNonlinearity):
    '''Maps z -> |z|^2, corresponding to power measurement by a photodetector.'''

[docs]    def __init__(self, N):
        super().__init__(N, holomorphic=False, mode="polar")

[docs]    def forward_pass(self, X: np.ndarray):
        return np.abs(X) ** 2

[docs]    def df_dr(self, r: np.ndarray, phi: np.ndarray):
        return 2 * r

[docs]    def df_dphi(self, r: np.ndarray, phi: np.ndarray):
        return 0 * phi


[docs]class Sigmoid(Nonlinearity):
    '''Sigmoid activation; maps z -> 1 / (1 + np.exp(-z))'''

[docs]    def forward_pass(self, X: np.ndarray):
        return 1 / (1 + np.exp(-X))

[docs]    def backward_pass(self, gamma: np.ndarray, Z: np.ndarray):
        sigma = 1 / (1 + np.exp(-Z))
        return sigma * (1 - sigma) * gamma


[docs]class SoftMax(Nonlinearity):
    '''Applies softmax to the inputs. Do not use in with categorical cross entropy, which implicitly includes this.'''

[docs]    def forward_pass(self, X: np.ndarray):
        return np.exp(X) / np.sum(np.exp(X), axis=0)

[docs]    def backward_pass(self, gamma: np.ndarray, Z: np.ndarray):
        softmax = np.exp(Z) / np.sum(np.exp(Z), axis=0)

        n_features, n_samples = Z.shape
        total_derivs = np.zeros(Z.shape, dtype=NP_COMPLEX)

        for i in range(n_samples):
            s = softmax[:, i].reshape(-1, 1)
            jac = np.diagflat(s) - np.dot(s, s.T)
            total_derivs[:, i] = jac.T @ gamma[:, i]

        return total_derivs


[docs]class LinearMask(ComplexNonlinearity):
    '''Technically not a nonlinearity: apply a linear gain/loss to each element'''

[docs]    def __init__(self, N: int, mask=None):
        super().__init__(N, holomorphic=True)
        if mask is None:
            self.mask = np.ones(N, dtype=NP_COMPLEX)
        else:
            self.mask = np.array(mask, dtype=NP_COMPLEX)

[docs]    def forward_pass(self, X: np.ndarray):
        return (X.T * self.mask).T

[docs]    def df_dZ(self, Z: np.ndarray):
        z_broadcaster = np.ones(Z.shape)
        return (z_broadcaster.T * self.mask).T
        # return ((Z.T * self.mask) / Z.T).T


[docs]class bpReLU(ComplexNonlinearity):
    '''
    Discontinuous (but holomorphic and backpropable) ReLU
    f(x_i) = alpha * x_i   if |x_i| <   cutoff
    f(x_i) = x_i           if |x_i| >=   cutoff

    Arguments:
    ----------
        cutoff: value of input |x_i| above which to fully transmit, below which to attentuate
        alpha: attenuation factor f(x_i) = f
    '''

[docs]    def __init__(self, N, cutoff=1, alpha=0):
        super().__init__(N, holomorphic=True)
        self.cutoff = cutoff
        self.alpha = alpha

[docs]    def forward_pass(self, X: np.ndarray):
        return (np.abs(X) >= self.cutoff) * X + (np.abs(X) < self.cutoff) * self.alpha * X

[docs]    def df_dZ(self, Z: np.ndarray):
        return (np.abs(Z) >= self.cutoff) * 1 + (np.abs(Z) < self.cutoff) * self.alpha * 1


[docs]class modReLU(ComplexNonlinearity):
    '''
    Contintous, but non-holomorphic and non-simply backpropabable ReLU of the form
    f(z) = (|z| - cutoff) * z / |z| if |z| >= cutoff (else 0)
    see: https://arxiv.org/pdf/1705.09792.pdf  (note, cutoff subtracted in this definition)

    Arguments:
    ----------
        cutoff: value of input |x_i| above which to
    '''

[docs]    def __init__(self, N, cutoff=1):
        super().__init__(N, holomorphic=False, mode="polar")
        self.cutoff = cutoff

[docs]    def forward_pass(self, X: np.ndarray):
        return (np.abs(X) >= self.cutoff) * (np.abs(X) - self.cutoff) * X / np.abs(X)

[docs]    def df_dr(self, r: np.ndarray, phi: np.ndarray):
        return (r >= self.cutoff) * np.exp(1j * phi)

[docs]    def df_dphi(self, r: np.ndarray, phi: np.ndarray):
        return (r >= self.cutoff) * 1j * (r - self.cutoff) * np.exp(1j * phi)


[docs]class cReLU(ComplexNonlinearity):
    '''
    Contintous, but non-holomorphic and non-simply backpropabable ReLU of the form
    f(z) = ReLU(Re{z}) + 1j * ReLU(Im{z})
    see: https://arxiv.org/pdf/1705.09792.pdf
    '''

[docs]    def __init__(self, N):
        super().__init__(N, holomorphic=False, mode="condensed")

[docs]    def forward_pass(self, X: np.ndarray):
        X_re = np.real(X)
        X_im = np.imag(X)
        return (X_re > 0) * X_re + 1j * (X_im > 0) * X_im

[docs]    def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        return (a > 0)

[docs]    def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        return 1j * (b > 0)


[docs]class zReLU(ComplexNonlinearity):
    '''
    Contintous, but non-holomorphic and non-simply backpropabable ReLU of the form
    f(z) = z if Re{z} > 0 and Im{z} > 0, else 0
    see: https://arxiv.org/pdf/1705.09792.pdf
    '''

[docs]    def __init__(self, N):
        super().__init__(N, holomorphic=False, mode="condensed")

[docs]    def forward_pass(self, X: np.ndarray):
        X_re = np.real(X)
        X_im = np.imag(X)
        return (X_re > 0) * (X_im > 0) * X

[docs]    def df_dRe(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        return (a > 0) * (b > 0)

[docs]    def df_dIm(self, a: np.ndarray, b: np.ndarray) -> np.ndarray:
        return (a > 0) * (b > 0) * 1j