% The bp of discriminator, the following bp involves the derivation of each parameter
% If you change the network structure (activation function, etc.), it involves bp changes, change the weights, the number of biases does not need to change bp
% In order to update w, b, it is the partial derivative of the final loss to w, b, and the residual is the result of the intermediate calculation process in the search for w and b partial derivatives.
function nn = nnbp_d(nn, y_h, y)
% d represents the residual, and the residual is the partial derivative of the final loss to the inactive value (z) of each layer. 
% The calculation of the partial derivative needs to use the chain derivation rule - manually introduced by itself
n = nn.layers_count;
   % residual of the last layer
   
    nn.layers{n}.d = delta_sigmoid_cross_entropy(y_h, y);
    for i = n-1:-1:2
        d = nn.layers{i+1}.d;
        w = nn.layers{i+1}.w;
        z = nn.layers{i}.z;
% The residual of each layer is the partial derivative of the inactive value of each layer, so the residual of the latter layer is multiplied by w, 
%and then multiplied by the partial derivative of the activation value to the inactive value.        
       nn.layers{i}.d = d*w' .* delta_relu(z);    
    end
    %After finding the residual of each layer, we can find the partial derivative of the final loss to weights and bias based on the residual.
    for i = 2:n
        d = nn.layers{i}.d;
        a = nn.layers{i-1}.a;
        % dw is the solution to the partial derivative of the weights of each layer
        nn.layers{i}.dw = a'*d / size(d, 1);
        nn.layers{i}.db = mean(d, 1);
    end
end