@article {3155, title = {Fisher-Rao Metric, Geometry, and Complexity of Neural Networks}, year = {2017}, month = {11/2017}, abstract = {
We study the relationship between geometry and capacity measures for deep\ neural\ networks\ from\ an\ invariance\ viewpoint.\ We\ introduce\ a\ new notion\ of\ capacity {\textemdash} the\ Fisher-Rao\ norm {\textemdash} that\ possesses\ desirable\ in- variance properties and is motivated by Information Geometry. We discover an analytical characterization of the new capacity measure, through which we establish norm-comparison inequalities and further show that the new measure serves as an umbrella for several existing norm-based complexity measures.\ We\ discuss\ upper\ bounds\ on\ the\ generalization\ error\ induced by\ the\ proposed\ measure.\ Extensive\ numerical\ experiments\ on\ CIFAR-10 support\ our\ theoretical\ findings.\ Our\ theoretical\ analysis\ rests\ on\ a\ key structural lemma about partial derivatives of multi-layer rectifier networks.
}, keywords = {capacity control, deep learning, Fisher-Rao metric, generalization error, information geometry, Invariance, natural gradient, ReLU activation, statistical learning theory}, url = {https://arxiv.org/abs/1711.01530}, author = {Liang, Tengyuan and Tomaso Poggio and Alexander Rakhlin and Stokes, James} }