@article {3155,
	title = {Fisher-Rao Metric, Geometry, and Complexity of Neural Networks},
	year = {2017},
	month = {11/2017},
	abstract = {<p>We study the relationship between geometry and capacity measures for deep\&nbsp; neural\&nbsp; networks\&nbsp; from\&nbsp; an\&nbsp; invariance\&nbsp; viewpoint.\&nbsp; We\&nbsp; introduce\&nbsp; a\&nbsp; new notion\&nbsp; of\&nbsp; capacity {\textemdash} the\&nbsp; Fisher-Rao\&nbsp; norm {\textemdash} that\&nbsp; possesses\&nbsp; desirable\&nbsp; in- variance properties and is motivated by Information Geometry. We discover an analytical characterization of the new capacity measure, through which we establish norm-comparison inequalities and further show that the new measure serves as an umbrella for several existing norm-based complexity measures.\&nbsp; We\&nbsp; discuss\&nbsp; upper\&nbsp; bounds\&nbsp; on\&nbsp; the\&nbsp; generalization\&nbsp; error\&nbsp; induced by\&nbsp; the\&nbsp; proposed\&nbsp; measure.\&nbsp; Extensive\&nbsp; numerical\&nbsp; experiments\&nbsp; on\&nbsp; CIFAR-10 support\&nbsp; our\&nbsp; theoretical\&nbsp; findings.\&nbsp; Our\&nbsp; theoretical\&nbsp; analysis\&nbsp; rests\&nbsp; on\&nbsp; a\&nbsp; key structural lemma about partial derivatives of multi-layer rectifier networks.</p>
},
	keywords = {capacity control, deep learning, Fisher-Rao metric, generalization error, information geometry, Invariance, natural gradient, ReLU activation, statistical learning theory},
	url = {https://arxiv.org/abs/1711.01530},
	author = {Liang, Tengyuan and Tomaso Poggio and Alexander Rakhlin and Stokes, James}
}