summaryrefslogtreecommitdiffstats
path: root/mlplib/activation.py
blob: 92a382a9fa785fe418d453593e1ffb39cf167240 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
"""
# -*- coding: utf-8 -*-
#
# Copyright 2021 Michael Büsch <m@bues.ch>
#
# Licensed under the Apache License version 2.0
# or the MIT license, at your option.
# SPDX-License-Identifier: Apache-2.0 OR MIT
#
"""

__all__ = [
    "Activation",
    "Sigmoid",
    "ReLU",
    "LReLU",
    "Tanh",
    "Softmax",
]

from typing import Optional, Tuple
from abc import ABC, abstractmethod
import numpy as np

def exp(x):
    return np.exp(np.minimum(x, 64.0))

class Activation(ABC):
    @abstractmethod
    def fn(self, z):
        """Forward activation function.
        """

    @abstractmethod
    def fn_d(self, z):
        """Activation function derivative.
        """

    def backward_prop(self,
                      w: np.ndarray,
                      da: np.ndarray,
                      x: np.ndarray,
                      z: np.ndarray,
                      m: int,
                      return_prev: bool)\
                      -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]:
        """
        Default implementation of neuron backward propagation.

        Inputs:
        w: The weights of this layer.
        da: Loss derivative w.r.t. this neuron's output.
        x: Layer input.
        z: Layer MAC result.
        m: Number of samples.
        return_prev: Return the loss derivative w.r.t. output of previous layer.

        Returns:
        tuple(dw, db, da_prev)
        dw: Loss derivative w.r.t. weights.
        db: Loss derivative w.r.t. biases.
        da_prev: Loss derivative w.r.t. previous neuron's output (if return_prev) or None.
        """
        assert da.shape == (m, w.shape[1])
        assert x.shape == (m, w.shape[0])
        assert z.shape == (m, w.shape[1])

        # Calculate the loss derivative w.r.t. Z.
        dz = da * self.fn_d(z)
        assert dz.shape == (m, w.shape[1])

        # Calculate the loss derivative w.r.t. weights and bias.
        m_reci = 1.0 / m
        dw = (x.T @ dz) * m_reci
        db = np.sum(dz, axis=0, keepdims=True) * m_reci
        assert dw.shape == w.shape
        assert db.shape == (1, w.shape[1])

        # Calculate loss derivative w.r.t. output of previous layer.
        da_prev = (dz @ w.T) if return_prev else None

        return (dw, db, da_prev)

class Sigmoid(Activation):
    """Sigmoid activation function.
    """

    def fn(self, z):
        return 1.0 / (1.0 + exp(-z))

    def fn_d(self, z):
        sz = self.fn(z)
        return sz * (1.0 - sz)

class ReLU(Activation):
    """ReLU activation function.
    """

    def fn(self, z):
        return np.maximum(0.0, z)

    def fn_d(self, z):
        return (z > 0.0).astype("float")

class LReLU(Activation):
    """Leaky ReLU activation function.
    """

    def __init__(self, alpha):
        self.alpha = alpha

    def fn(self, z):
        return np.maximum(self.alpha * z, z)

    def fn_d(self, z):
        return np.minimum((z > 0.0).astype("float") + self.alpha, 1.0)

class Tanh(Activation):
    """Tanh activation function.
    """

    def fn(self, z):
        ez = exp(z)
        emz = exp(-z)
        return (ez - emz) / (ez + emz)

    def fn_d(self, z):
        tz = self.fn(z)
        return 1.0 - (tz * tz)

class Softmax(Activation):
    """Softmax activation function.
    """

    def fn(self, z):
        assert z.ndim == 2
        ze = exp(z - z.max())
        return ze / np.sum(ze, axis=1).reshape((-1, 1))

    def fn_d(self, z):
        raise NotImplementedError

    def backward_prop(self,
                      w: np.ndarray,
                      da: np.ndarray,
                      x: np.ndarray,
                      z: np.ndarray,
                      m: int,
                      return_prev: bool)\
                      -> Tuple[np.ndarray, np.ndarray, Optional[np.ndarray]]:
        pass#TODO
        raise NotImplementedError

# vim: ts=4 sw=4 expandtab
bues.ch cgit interface