mlplib/gradient_check.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103

"""
# -*- coding: utf-8 -*-
#
# Copyright 2021 Michael Büsch <m@bues.ch>
#
# Licensed under the Apache License version 2.0
# or the MIT license, at your option.
# SPDX-License-Identifier: Apache-2.0 OR MIT
#
"""

__all__ = [
    "gradient_check",
]

from mlplib.backward import BackpropGrads
from mlplib.forward import forward_prop
from mlplib.loss import Loss
from mlplib.parameters import Parameters
from mlplib.util import printlist
from typing import Callable
import numpy as np
import sys

def wb_params_to_vector(params: Parameters) -> np.ndarray:
    ret = np.zeros((0,))
    for param in params:
        ret = np.concatenate((ret,
                              param.w.reshape((-1,)),
                              param.b.reshape((-1,))))
    return ret

def wb_vector_to_params(params: Parameters,
                        vect: np.ndarray) -> None:
    offs = 0
    def v2m(m):
        nonlocal offs
        m.put(np.arange(0, m.size), vect[offs:offs+m.size])
        offs += m.size
    for param in params:
        v2m(param.w)
        v2m(param.b)

def wb_grads_to_vector(backprop_grads: BackpropGrads) -> np.ndarray:
    ret = np.zeros((0,))
    for grads in backprop_grads:
        ret = np.concatenate((ret,
                              grads.dw.reshape((-1,)),
                              grads.db.reshape((-1,))))
    return ret

def grads_equal(gradients0: np.ndarray,
                gradients1: np.ndarray,
                threshold: float) -> bool:
    norm = np.linalg.norm
    diff = (norm(gradients0 - gradients1) /
            (norm(gradients0) + norm(gradients1)))
    return diff <= threshold

def gradient_check(x: np.ndarray,
                   y: np.ndarray,
                   params: Parameters,
                   loss: Loss,
                   backprop_grads: BackpropGrads,
                   epsilon: float = 1e-7,
                   threshold: float = 1e-7) -> bool:
    # Convert weights and biases to vector.
    wb_vect = wb_params_to_vector(params)
    nr_params = wb_vect.size
    assert wb_vect.shape == (nr_params,)

    est_grads = np.zeros((nr_params,))

    # For each weight and bias.
    for i in range(nr_params):
        pos_step = wb_vect.copy()
        neg_step = wb_vect.copy()
        pos_step[i] += epsilon
        neg_step[i] -= epsilon

        # Forward prop with positive epsilon
        wb_vector_to_params(params, pos_step)
        yh = forward_prop(x, params)
        loss_pos = loss.fn(yh, y)

        # Forward prop with negative epsilon
        wb_vector_to_params(params, neg_step)
        yh = forward_prop(x, params)
        loss_neg = loss.fn(yh, y)

        est_grads[i] = (loss_pos - loss_neg) / (2.0 * epsilon)

    # Restore original parameters.
    wb_vector_to_params(params, wb_vect)

    # Convert backprop gradients to vector.
    grads = wb_grads_to_vector(backprop_grads)
    assert grads.shape == (nr_params,)

    # Compare backprop gradients to estimated gradients.
    return grads_equal(grads, est_grads, threshold)

# vim: ts=4 sw=4 expandtab