@article {4632,
	title = {ThreeDWorld: A Platform for Interactive Multi-Modal Physical Simulation},
	journal = {arXiv},
	year = {2020},
	month = {07/2020},
	type = {Preprint},
	abstract = {<p>We introduce ThreeDWorld (TDW), a platform for interactive multi-modal physical simulation. With TDW, users can simulate high-fidelity sensory data and physical interactions between mobile agents and objects in a wide variety of rich 3D environments. TDW has several unique properties: 1) realtime near photo-realistic image rendering quality; 2) a library of objects and environments with materials for high-quality rendering, and routines enabling user customization of the asset library; 3) generative procedures for efficiently building classes of new environments 4) high-fidelity audio rendering; 5) believable and realistic physical interactions for a wide variety of material types, including cloths, liquid, and deformable objects; 6) a range of "avatar" types that serve as embodiments of AI agents, with the option for user avatar customization; and 7) support for human interactions with VR devices. TDW also provides a rich API enabling multiple agents to interact within a simulation and return a range of sensor and physics data representing the state of the world. We present initial experiments enabled by the platform around emerging research directions in computer vision, machine learning, and cognitive science, including multi-modal physical scene understanding, multi-agent interactions, models that "learn like a child", and attention studies in humans and neural networks. The simulation platform will be made publicly available.</p>
},
	url = {https://arxiv.org/abs/2007.04954},
	author = {Chuang Gen and Jeremy Schwartz and Seth Alter and Martin Schrimpf and James Traer and Julian De Freitas and Jonas Kubilius and Abhishek Bhandwaldar and Nick Haber and Megumi Sano and Kuno Kim and Elias Wang and Damian Mrowca and Michael Lingelbach and Aidan Curtis and Kevin Feigleis and Daniel Bear and Dan Gutfreund and David Cox and James J. DiCarlo and Josh H. McDermott and Joshua B. Tenenbaum and Daniel L K Yamins}
}
@article {4633,
	title = {ThreeDWorld (TDW): A High-Fidelity, Multi-Modal Platform for Interactive Physical Simulation},
	year = {2020},
	month = {07/2020},
	abstract = {<h3>TDW is a 3D virtual world simulation platform, utilizing state-of-the-art video game engine technology</h3>

<p>A TDW simulation consists of two components: a) the <strong>Build</strong>, a compiled executable running on the Unity3D Engine, which is responsible for image rendering, audio synthesis and physics simulations; and b) the <strong>Controller</strong>, an external Python interface to communicate with the build.</p>

<p>Researchers write Controllers that send commands to the Build, which executes those commands and returns a broad range of data types representing the state of the virtual world.</p>

<p>TDW provides researchers with:</p>

<ul>
	<li>A general, flexible design that does not impose constraints on the types of use-cases it can support, nor force any particular metaphor on the user.</li>
	<li>Support for multiple modalities -- visual rendering with near-photoreal image quality, coupled with superior audio rendering fidelity.</li>
	<li>A comprehensive, highly extensible and thoroughly documented command and control Python API.</li>
	<li>Multiple paradigms for object interaction, capable of generating physically-realistic behavior.</li>
</ul>

<p>TDW is being used on a daily basis in multiple labs, supporting research that sits at the nexus of neuroscience, cognitive science and artificial intelligence.</p>

<p>Find out more about ThreeDWorld on the project weobsite using the link below.</p>
},
	url = {http://www.threedworld.org/},
	author = {Jeremy Schwartz and Seth Alter and James J. DiCarlo and Josh H. McDermott and Joshua B. Tenenbaum and Daniel L K Yamins and Dan Gutfreund and Chuang Gan and James Traer and Jonas Kubilius and Martin Schrimpf and Abhishek Bhandwaldar and Julian De Freitas and Damian Mrowca and Michael Lingelbach and Megumi Sano and Daniel Bear and Kuno Kim and Nick Haber and Chaofei Fan}
}
@proceedings {4379,
	title = {Brain-Like Object Recognition with High-Performing Shallow Recurrent ANNs},
	year = {2019},
	month = {10/2019},
	address = {Vancouver, Canada},
	abstract = {<p>Deep convolutional artificial neural networks (ANNs) are the leading class of candidate models of the mechanisms of visual processing in the primate ventral stream. While initially inspired by brain anatomy, over the past years, these ANNs have evolved from a simple eight-layer architecture in AlexNet to extremely deep and branching architectures, demonstrating increasingly better object categorization performance, yet bringing into question how brain-like they still are. In particular, typical deep models from the machine learning community are often hard to map onto the brain{\textquoteright}s anatomy due to their vast number of layers and missing biologically-important connections, such as recurrence. Here we demonstrate that better anatomical alignment to the brain and high performance on machine learning as well as neuroscience measures do not have to be in contradiction. We developed CORnet-S, a shallow ANN with four anatomically mapped areas and recurrent connectivity, guided by Brain-Score, a new large-scale composite of neural and behavioral benchmarks for quantifying the functional fidelity of models of the primate ventral visual stream. Despite being significantly shallower than most models, CORnet-S is the top model on Brain-Score and outperforms similarly compact models on ImageNet. Moreover, our extensive analyses of CORnet-S circuitry variants reveal that recurrence is the main predictive factor of both Brain- Score and ImageNet top-1 performance. Finally, we report that the temporal evolution of the CORnet-S "IT" neural population resembles the actual monkey IT population dynamics. Taken together, these results establish CORnet-S, a compact, recurrent ANN, as the current best model of the primate ventral visual stream.</p>
},
	author = {Jonas Kubilius and Martin Schrimpf and Kohitij Kar and Rishi Rajalingham and Ha Hong and Najib J. Majaj and Elias B. Issa and Pouya Bashivan and Jonathan Prescott-Roy and Kailyn Schmidt and Aran Nayebi and Daniel Bear and Daniel L K Yamins and James J. DiCarlo}
}
@article {4294,
	title = {Brain-Score: Which Artificial Neural Network for Object Recognition is most Brain-Like?},
	journal = {bioRxiv preprint},
	year = {2018},
	abstract = {<p>The internal representations of early deep artificial neural networks (ANNs) were found to be remarkably similar to the internal neural representations measured experimentally in the primate brain. Here we ask, as deep ANNs have continued to evolve, are they becoming more or less brain-like? ANNs that are most functionally similar to the brain will contain mechanisms that are most like those used by the brain. We therefore developed <em>Brain-Score</em> {\textendash} a composite of multiple neural and behavioral benchmarks that score any ANN on how similar it is to the brain{\textquoteright}s mechanisms for core object recognition {\textendash} and we deployed it to evaluate a wide range of state-of-the-art deep ANNs. Using this scoring system, we here report that: (1) DenseNet-169, CORnet-S and ResNet-101 are the most brain-like ANNs. There remains considerable variability in neural and behavioral responses that is not predicted by any ANN, suggesting that no ANN model has yet captured all the relevant mechanisms. (3) Extending prior work, we found that gains in ANN ImageNet performance led to gains on Brain-Score. However, correlation weakened at <em>>=</em> 70\% top-1 ImageNet performance, suggesting that additional guidance from neuroscience is needed to make further advances in capturing brain mechanisms. (4) We uncovered smaller (i.e. less complex) ANNs that are more brain-like than many of the best-performing ImageNet models, which suggests the opportunity to simplify ANNs to better understand the ventral stream. The scoring system used here is far from complete. However, we propose that evaluating and tracking model-benchmark correspondences through a Brain-Score that is regularly updated with new brain data is an exciting opportunity: experimental benchmarks can be used to guide machine network evolution, and machine networks are mechanistic hypotheses of the brain{\textquoteright}s network and thus drive next experiments. To facilitate both of these, we release Brain-Score.org: a platform that hosts the neural and behavioral benchmarks, where ANNs for visual processing can be submitted to receive a Brain-Score and their rank relative to other models, and where new experimental data can be naturally incorporated.</p>
},
	keywords = {computational neuroscience, deep learning, Neural Networks, object recognition, ventral stream},
	doi = {10.1101/407007},
	url = {https://www.biorxiv.org/content/10.1101/407007v1},
	author = {Martin Schrimpf and Jonas Kubilius},
	editor = {Ha Hong and Najib J. Majaj and Rishi Rajalingham and Elias B. Issa and Kohitij Kar and Pouya Bashivan and Jonathan Prescott-Roy and Kailyn Schmidt and Daniel L K Yamins and James J. DiCarlo}
}
@article {3573,
	title = {A task-optimized neural network replicates human auditory behavior, predicts brain responses, and reveals a cortical processing hierarchy},
	journal = {Neuron},
	volume = {98},
	year = {2018},
	month = {04/2018},
	abstract = {<p>A core goal of auditory neuroscience is to build quantitative models that predict cortical responses to natural sounds. Reasoning that a complete model of auditory cortex must solve ecologically relevant tasks, we optimized hierarchical neural networks for speech and music recognition. The best-performing network contained separate music and speech pathways following early shared processing, potentially replicating human cortical organization. The network performed both tasks as well as humans and exhibited human-like errors despite not being optimized to do so, suggesting common constraints on network and human performance. The network predicted fMRI voxel responses substantially better than traditional spectrotemporal filter models throughout auditory cortex. It also provided a quantitative signature of cortical representational hierarchy{\textemdash}primary and non-primary responses were best predicted by intermediate and late network layers, respectively. The results suggest that task optimization provides a powerful set of tools for modeling sensory systems.</p>
},
	keywords = {auditory cortex, convolutional neural network, deep learning, deep neural network, encoding models, fMRI, Hierarchy, human auditory cortex, natural sounds, word recognition},
	doi = {10.1016/j.neuron.2018.03.044},
	url = {https://www.sciencedirect.com/science/article/pii/S0896627318302502},
	author = {Alexander J. E. Kell and Daniel L K Yamins and Erica N Shook and Sam V Norman-Haignere and Josh H. McDermott}
}
@article {2881,
	title = {Neural Representation Benchmark [code]},
	year = {2013},
	abstract = {<p>A key requirement for the development of effective learning representations is their evaluation and comparison to representations we know to be effective. In natural sensory domains, the community has viewed the brain as a source of inspiration and as an implicit benchmark for success. However, it has not been possible to directly test representational learning algorithms directly against the representations contained in neural systems. Here, we propose a new benchmark for visual representations on which we have directly tested the neural representation in multiple visual cortical areas in macaque (utilizing data from [Majaj et al., 2012]), and on which any computer vision algorithm that produces a feature space can be tested. The benchmark measures the effectiveness of the neural or machine representation by computing the classification loss on the ordered eigendecomposition of a kernel matrix [Montavon et al., 2011]. In our analysis we find that the neural representation in visual area IT is superior to visual area V4. In our analysis of representational learning algorithms, we find that three-layer models approach the representational performance of V4 and the algorithm in [Le et al., 2012] surpasses the performance of V4. Impressively, we find that a recent supervised algorithm [Krizhevsky et al., 2012] achieves performance comparable to that of IT for an intermediate level of image variation difficulty, and surpasses IT at a higher difficulty level. We believe this result represents a major milestone: it is the first learning algorithm we have found that exceeds our current estimate of IT representation performance. We hope that this benchmark will assist the community in matching the representational performance of visual cortex and will serve as an initial rallying point for further correspondence between representations derived in brains and machines.</p>

<p>For more information and to download code, etc. please visit the project website - http://dicarlolab.mit.edu/neuralbenchmark</p>
},
	author = {Charles F. Cadieu and Ha Hong and Daniel L K Yamins and Nicolas Pinto and Najib J. Majaj and James J. DiCarlo}
}