@article {3155, title = {Fisher-Rao Metric, Geometry, and Complexity of Neural Networks}, year = {2017}, month = {11/2017}, abstract = {

We study the relationship between geometry and capacity measures for deep\  neural\  networks\  from\  an\  invariance\  viewpoint.\  We\  introduce\  a\  new notion\  of\  capacity {\textemdash} the\  Fisher-Rao\  norm {\textemdash} that\  possesses\  desirable\  in- variance properties and is motivated by Information Geometry. We discover an analytical characterization of the new capacity measure, through which we establish norm-comparison inequalities and further show that the new measure serves as an umbrella for several existing norm-based complexity measures.\  We\  discuss\  upper\  bounds\  on\  the\  generalization\  error\  induced by\  the\  proposed\  measure.\  Extensive\  numerical\  experiments\  on\  CIFAR-10 support\  our\  theoretical\  findings.\  Our\  theoretical\  analysis\  rests\  on\  a\  key structural lemma about partial derivatives of multi-layer rectifier networks.

}, keywords = {capacity control, deep learning, Fisher-Rao metric, generalization error, information geometry, Invariance, natural gradient, ReLU activation, statistical learning theory}, url = {https://arxiv.org/abs/1711.01530}, author = {Liang, Tengyuan and Tomaso Poggio and Alexander Rakhlin and Stokes, James} }