253 for (
size_t j = 0; j < (size_t) reset_gradient.
GetNcols(); j++) {
254 for (
size_t i = 0;
i < (size_t) reset_gradient.
GetNrows();
i++) {
255 reset_gradient(
i,j) = 1 - reset_gradient(
i,j);
259 Hadamard(reset_gradient, state_gradients_backward);
261 tmpMul.
Mult(reset_gradient, weights_candidate_state);
262 Hadamard(tmpMul, precStateActivations);
264 reset_gradient = tmpMul;
268 for (
size_t j = 0; j < (size_t) update_gradient.
GetNcols(); j++) {
269 for (
size_t i = 0;
i < (size_t) update_gradient.
GetNrows();
i++) {
270 update_gradient(
i,j) = update_gradient(
i,j) - fCandidate(
i,j);
274 Hadamard(update_gradient, state_gradients_backward);
278 for (
size_t j = 0; j < (size_t) candidate_gradient.
GetNcols(); j++) {
279 for (
size_t i = 0;
i < (size_t) candidate_gradient.
GetNrows();
i++) {
280 candidate_gradient(
i,j) = 1 - candidate_gradient(
i,j);
284 Hadamard(candidate_gradient, state_gradients_backward);
291 state_gradients_backward = term;
294 term = precStateActivations;
298 var.
Mult(term, weights_update_state);
300 state_gradients_backward += term;
304 for (
size_t j = 0; j < (size_t) term.
GetNcols(); j++) {
305 for (
size_t i = 0;
i < (size_t) term.
GetNrows();
i++) {
306 term(
i,j) = - term(
i,j);
311 var.
Mult(term, weights_update_state);
313 state_gradients_backward += term;
317 for (
size_t j = 0; j < (size_t) term.
GetNcols(); j++) {
318 for (
size_t i = 0;
i < (size_t) term.
GetNrows();
i++) {
319 term(
i,j) = 1 - term(
i,j);
324 var.
Mult(term, weights_candidate_state);
327 state_gradients_backward += term;
331 for (
size_t j = 0; j < (size_t) term.
GetNcols(); j++) {
332 for (
size_t i = 0;
i < (size_t) term.
GetNrows();
i++) {
333 term(
i,j) = 1 - term(
i,j);
338 var.
Mult(term, weights_candidate_state);
339 Hadamard(var, precStateActivations);
341 term.
Mult(var, weights_reset_state);
342 state_gradients_backward += term;
346 tmpInp.
Mult(reset_gradient, weights_reset);
347 input_gradient = tmpInp;
348 tmpInp.
Mult(update_gradient, weights_update);
349 input_gradient += tmpInp;
350 tmpInp.
Mult(candidate_gradient, weights_candidate);
351 input_gradient += tmpInp;
355 reset_weight_gradients.TMult(reset_gradient,
input);
356 reset_weight_gradients += tmp;
357 tmp = update_weight_gradients;
358 update_weight_gradients.TMult(update_gradient,
input);
359 update_weight_gradients += tmp;
360 tmp = candidate_weight_gradients;
361 candidate_weight_gradients.TMult(candidate_gradient,
input);
362 candidate_weight_gradients += tmp;
366 reset_state_weight_gradients.TMult(reset_gradient, precStateActivations);
367 reset_state_weight_gradients += tmp1;
368 tmp1 = update_state_weight_gradients;
369 update_state_weight_gradients.
TMult(update_gradient, precStateActivations);
370 update_state_weight_gradients += tmp1;
371 tmp1 = candidate_state_weight_gradients;
373 Hadamard(tmp2, precStateActivations);
374 candidate_state_weight_gradients.TMult(candidate_gradient, tmp2);
375 candidate_state_weight_gradients += tmp1;
378 for (
size_t j = 0; j < (size_t) du.GetNcols(); j++) {
379 Scalar_t sum_reset = 0.0, sum_update = 0.0, sum_candidate = 0.0;
381 for (
size_t i = 0;
i < (size_t) du.GetNrows();
i++) {
382 sum_reset += reset_gradient(
i,j);
383 sum_update += update_gradient(
i,j);
384 sum_candidate += candidate_gradient(
i,j);
386 reset_bias_gradients(j,0) += sum_reset;
387 update_bias_gradients(j,0) += sum_update;
388 candidate_bias_gradients(j,0) += sum_candidate;
391 return input_gradient;