""" # -*- coding: utf-8 -*- # # Copyright 2021 Michael Büsch # # Licensed under the Apache License version 2.0 # or the MIT license, at your option. # SPDX-License-Identifier: Apache-2.0 OR MIT # """ __all__ = [ "gradient_check", ] from mlplib.backward import BackpropGrads from mlplib.forward import forward_prop from mlplib.loss import Loss from mlplib.parameters import Parameters from mlplib.util import printlist from typing import Callable import numpy as np import sys def wb_params_to_vector(params: Parameters) -> np.ndarray: ret = np.zeros((0,)) for param in params: ret = np.concatenate((ret, param.w.reshape((-1,)), param.b.reshape((-1,)))) return ret def wb_vector_to_params(params: Parameters, vect: np.ndarray) -> None: offs = 0 def v2m(m): nonlocal offs m.put(np.arange(0, m.size), vect[offs:offs+m.size]) offs += m.size for param in params: v2m(param.w) v2m(param.b) def wb_grads_to_vector(backprop_grads: BackpropGrads) -> np.ndarray: ret = np.zeros((0,)) for grads in backprop_grads: ret = np.concatenate((ret, grads.dw.reshape((-1,)), grads.db.reshape((-1,)))) return ret def grads_equal(gradients0: np.ndarray, gradients1: np.ndarray, threshold: float) -> bool: norm = np.linalg.norm diff = (norm(gradients0 - gradients1) / (norm(gradients0) + norm(gradients1))) return diff <= threshold def gradient_check(x: np.ndarray, y: np.ndarray, params: Parameters, loss: Loss, backprop_grads: BackpropGrads, epsilon: float = 1e-7, threshold: float = 1e-7) -> bool: # Convert weights and biases to vector. wb_vect = wb_params_to_vector(params) nr_params = wb_vect.size assert wb_vect.shape == (nr_params,) est_grads = np.zeros((nr_params,)) # For each weight and bias. for i in range(nr_params): pos_step = wb_vect.copy() neg_step = wb_vect.copy() pos_step[i] += epsilon neg_step[i] -= epsilon # Forward prop with positive epsilon wb_vector_to_params(params, pos_step) yh = forward_prop(x, params) loss_pos = loss.fn(yh, y) # Forward prop with negative epsilon wb_vector_to_params(params, neg_step) yh = forward_prop(x, params) loss_neg = loss.fn(yh, y) est_grads[i] = (loss_pos - loss_neg) / (2.0 * epsilon) # Restore original parameters. wb_vector_to_params(params, wb_vect) # Convert backprop gradients to vector. grads = wb_grads_to_vector(backprop_grads) assert grads.shape == (nr_params,) # Compare backprop gradients to estimated gradients. return grads_equal(grads, est_grads, threshold) # vim: ts=4 sw=4 expandtab