@article {4185, title = {Theory I: Deep networks and the curse of dimensionality}, journal = {Bulletin of the Polish Academy of Sciences: Technical Sciences}, volume = {66}, year = {2018}, abstract = {

We review recent work characterizing the classes of functions for which deep learning can be exponentially better than shallow learning. Deep convolutional networks are a special case of these conditions, though weight sharing is not the main reason for their exponential advantage.

}, keywords = {convolutional neural networks, deep and shallow networks, deep learning, function approximation}, author = {Tomaso Poggio and Qianli Liao} } @article {2557, title = {Why and when can deep-but not shallow-networks avoid the curse of dimensionality: A review}, journal = {International Journal of Automation and Computing}, year = {2017}, month = {03/2017}, pages = {1-17}, abstract = {

The paper reviews and extends an emerging body of theoretical results on deep learning including the conditions under which it can be exponentially better than shallow learning. A class of deep convolutional networks represent an important special case of these conditions, though weight sharing is not the main reason for their exponential advantage. Implications of a few key theorems are discussed, together with new results, open problems and conjectures.

}, keywords = {convolutional neural networks, deep and shallow networks, deep learning, function approximation, Machine Learning, Neural Networks}, doi = {10.1007/s11633-017-1054-2}, url = {http://link.springer.com/article/10.1007/s11633-017-1054-2?wt_mc=Internal.Event.1.SEM.ArticleAuthorOnlineFirst}, author = {Tomaso Poggio and Hrushikesh Mhaskar and Lorenzo Rosasco and Brando Miranda and Qianli Liao} } @article {3662, title = {Deep vs. shallow networks: An approximation theory perspective}, journal = {Analysis and Applications}, volume = {14}, year = {2016}, month = {01/2016}, pages = {829 - 848}, abstract = {
The paper briefly reviews several recent results on hierarchical architectures for learning from examples, that may formally explain the conditions under which Deep Convolutional Neural Networks perform much better in function approximation problems than shallow, one-hidden layer architectures. The paper announces new results for a non-smooth activation function {\textemdash} the ReLU function {\textemdash} used in present-day neural networks, as well as for the Gaussian networks. We propose a new definition of relative dimension to encapsulate different notions of sparsity of a function class that can possibly be exploited by deep networks but not by shallow ones to drastically reduce the complexity required for approximation and learning.
}, keywords = {blessed representation, deep and shallow networks, Gaussian networks, ReLU networks}, issn = {0219-5305}, doi = {10.1142/S0219530516400042}, url = {http://www.worldscientific.com/doi/abs/10.1142/S0219530516400042}, author = {Hrushikesh Mhaskar and Tomaso Poggio} }