"""
# -*- coding: utf-8 -*-
#
# Copyright 2021 Michael Büsch <m@bues.ch>
#
# Licensed under the Apache License version 2.0
# or the MIT license, at your option.
# SPDX-License-Identifier: Apache-2.0 OR MIT
#
"""

__all__ = [
    "Activation",
    "Sigmoid",
    "ReLU",
    "LReLU",
    "Tanh",
    "Softmax",
]

from typing import Optional, Tuple
from abc import ABC, abstractmethod
import numpy as np

def exp(x):
    return np.exp(np.minimum(x, 64.0))

class Activation(ABC):
    @abstractmethod
    def fn(self, z):
        """Forward activation function.
        """

    @abstractmethod
    def fn_d(self, z):
        """Activation function derivative.
        """

    def backward_prop(self,
                      w: np.ndarray,
                      da: np.ndarray,
                      x: np.ndarray,
                      z: np.ndarray,
                      m: int,
                      return_prev: bool)\
                      -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]:
        """
        Default implementation of neuron backward propagation.

        Inputs:
        w: The weights of this layer.
        da: Loss derivative w.r.t. this neuron's output.
        x: Layer input.
        z: Layer MAC result.
        m: Number of samples.
        return_prev: Return the loss derivative w.r.t. output of previous layer.

        Returns:
        tuple(dw, db, da_prev)
        dw: Loss derivative w.r.t. weights.
        db: Loss derivative w.r.t. biases.
        da_prev: Loss derivative w.r.t. previous neuron's output (if return_prev) or None.
        """
        assert da.shape == (m, w.shape[1])
        assert x.shape == (m, w.shape[0])
        assert z.shape == (m, w.shape[1])

        # Calculate the loss derivative w.r.t. Z.
        dz = da * self.fn_d(z)
        assert dz.shape == (m, w.shape[1])

        # Calculate the loss derivative w.r.t. weights and bias.
        m_reci = 1.0 / m
        dw = (x.T @ dz) * m_reci
        db = np.sum(dz, axis=0, keepdims=True) * m_reci
        assert dw.shape == w.shape
        assert db.shape == (1, w.shape[1])

        # Calculate loss derivative w.r.t. output of previous layer.
        da_prev = (dz @ w.T) if return_prev else None

        return (dw, db, da_prev)

class Sigmoid(Activation):
    """Sigmoid activation function.
    """

    def fn(self, z):
        return 1.0 / (1.0 + exp(-z))

    def fn_d(self, z):
        sz = self.fn(z)
        return sz * (1.0 - sz)

class ReLU(Activation):
    """ReLU activation function.
    """

    def fn(self, z):
        return np.maximum(0.0, z)

    def fn_d(self, z):
        return (z > 0.0).astype("float")

class LReLU(Activation):
    """Leaky ReLU activation function.
    """

    def __init__(self, alpha):
        self.alpha = alpha

    def fn(self, z):
        return np.maximum(self.alpha * z, z)

    def fn_d(self, z):
        return np.minimum((z > 0.0).astype("float") + self.alpha, 1.0)

class Tanh(Activation):
    """Tanh activation function.
    """

    def fn(self, z):
        ez = exp(z)
        emz = exp(-z)
        return (ez - emz) / (ez + emz)

    def fn_d(self, z):
        tz = self.fn(z)
        return 1.0 - (tz * tz)

class Softmax(Activation):
    """Softmax activation function.
    """

    def fn(self, z):
        assert z.ndim == 2
        ze = exp(z - z.max())
        return ze / np.sum(ze, axis=1).reshape((-1, 1))

    def fn_d(self, z):
        raise NotImplementedError

    def backward_prop(self,
                      w: np.ndarray,
                      da: np.ndarray,
                      x: np.ndarray,
                      z: np.ndarray,
                      m: int,
                      return_prev: bool)\
                      -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]:
        pass#TODO
        raise NotImplementedError

# vim: ts=4 sw=4 expandtab