1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
|
"""
# -*- coding: utf-8 -*-
#
# Copyright 2021 Michael Büsch <m@bues.ch>
#
# Licensed under the Apache License version 2.0
# or the MIT license, at your option.
# SPDX-License-Identifier: Apache-2.0 OR MIT
#
"""
__all__ = [
"gradient_check",
]
from mlplib.backward import BackpropGrads
from mlplib.forward import forward_prop
from mlplib.loss import Loss
from mlplib.parameters import Parameters
from mlplib.util import printlist
from typing import Callable
import numpy as np
import sys
def wb_params_to_vector(params: Parameters) -> np.ndarray:
ret = np.zeros((0,))
for param in params:
ret = np.concatenate((ret,
param.w.reshape((-1,)),
param.b.reshape((-1,))))
return ret
def wb_vector_to_params(params: Parameters,
vect: np.ndarray) -> None:
offs = 0
def v2m(m):
nonlocal offs
m.put(np.arange(0, m.size), vect[offs:offs+m.size])
offs += m.size
for param in params:
v2m(param.w)
v2m(param.b)
def wb_grads_to_vector(backprop_grads: BackpropGrads) -> np.ndarray:
ret = np.zeros((0,))
for grads in backprop_grads:
ret = np.concatenate((ret,
grads.dw.reshape((-1,)),
grads.db.reshape((-1,))))
return ret
def grads_equal(gradients0: np.ndarray,
gradients1: np.ndarray,
threshold: float) -> bool:
norm = np.linalg.norm
diff = (norm(gradients0 - gradients1) /
(norm(gradients0) + norm(gradients1)))
return diff <= threshold
def gradient_check(x: np.ndarray,
y: np.ndarray,
params: Parameters,
loss: Loss,
backprop_grads: BackpropGrads,
epsilon: float = 1e-7,
threshold: float = 1e-7) -> bool:
# Convert weights and biases to vector.
wb_vect = wb_params_to_vector(params)
nr_params = wb_vect.size
assert wb_vect.shape == (nr_params,)
est_grads = np.zeros((nr_params,))
# For each weight and bias.
for i in range(nr_params):
pos_step = wb_vect.copy()
neg_step = wb_vect.copy()
pos_step[i] += epsilon
neg_step[i] -= epsilon
# Forward prop with positive epsilon
wb_vector_to_params(params, pos_step)
yh = forward_prop(x, params)
loss_pos = loss.fn(yh, y)
# Forward prop with negative epsilon
wb_vector_to_params(params, neg_step)
yh = forward_prop(x, params)
loss_neg = loss.fn(yh, y)
est_grads[i] = (loss_pos - loss_neg) / (2.0 * epsilon)
# Restore original parameters.
wb_vector_to_params(params, wb_vect)
# Convert backprop gradients to vector.
grads = wb_grads_to_vector(backprop_grads)
assert grads.shape == (nr_params,)
# Compare backprop gradients to estimated gradients.
return grads_equal(grads, est_grads, threshold)
# vim: ts=4 sw=4 expandtab
|