@article {4625, title = {Implicit dynamic regularization in deep networks}, year = {2020}, month = {08/2020}, abstract = {

Square loss has been observed to perform well in classification tasks, at least as well as crossentropy. However, a theoretical justification is lacking. Here we develop a theoretical analysis for the square loss that \ complements the existing asymptotic analysis for the exponential loss.

}, author = {Tomaso Poggio and Qianli Liao and Mengjia Xu} }