@article {5284, title = {Aligning Model and Macaque Inferior Temporal Cortex Representations Improves Model-to-Human Behavioral Alignment and Adversarial Robustness}, journal = {bioRxiv}, year = {2022}, month = {07/2022}, abstract = {

While some state-of-the-art artificial neural network systems in computer vision are strikingly accurate models of the corresponding primate visual processing, there are still many discrepancies between these models and the behavior of primates on object recognition tasks. Many current models suffer from extreme sensitivity to adversarial attacks and often do not align well with the image-by-image behavioral error patterns observed in humans. Previous research has provided strong evidence that primate object recognition behavior can be very accurately predicted by neural population activity in the inferior temporal (IT) cortex, a brain area in the late stages of the visual processing hierarchy. Therefore, here we directly test whether making the late stage representations of models more similar to that of macaque IT produces new models that exhibit more robust, primate-like behavior. We conducted chronic, large-scale multi-electrode recordings across the IT cortex in six non-human primates (rhesus macaques). We then use these data to fine-tune (end-to-end) the model {\textquotedblleft}IT{\textquotedblright} representations such that they are more aligned with the biological IT representations, while preserving accuracy on object recognition tasks. We generate a cohort of models with a range of IT similarity scores validated on held-out animals across two image sets with distinct statistics. Across a battery of optimization conditions, we observed a strong correlation between the models{\textquoteright} IT-likeness and alignment with human behavior, as well as an increase in its adversarial robustness. We further assessed the limitations of this approach and find that the improvements in behavioral alignment and adversarial robustness generalize across different image statistics, but not to object categories outside of those covered in our IT training set. Taken together, our results demonstrate that building models that are more aligned with the primate brain leads to more robust and human-like behavior, and call for larger neural data-sets to further augment these gains.

}, author = {Joel Dapello and Kohitij Kar and Martin Schrimpf and Robert Geary and Michael Ferguson and David D. Cox and James J. DiCarlo} } @article {5066, title = { Chemogenetic suppression of macaque V4 neurons produces retinotopically specific deficits in downstream IT neural activity patterns and core object recognition behavior}, journal = {Journal of Vision}, volume = {21}, year = {2021}, month = {09/2021}, chapter = {2489}, abstract = {

Distributed activity patterns across multiple brain areas (e.g., V4, IT) enable primates to accurately identify visual objects. To strengthen our inferences about the causal role of underlying brain circuits, it is necessary to develop targeted neural perturbation strategies that enable discrimination amongst competing models. To probe the role of area V4 in core object recognition, we expressed inhibitory DREADDs in neurons within a 5x5 mm subregion of V4 cortex via multiple viral injections (AAV8-hSyn-hM4Di-mCherry; two macaques). To assay for successful neural suppression, we recorded from a multi-electrode array implanted over the transfected V4. We also recorded from multi-electrode arrays in the IT cortex (the primary feedforward target of V4), while simultaneously measuring the monkeys{\textquoteright} behavior during object discrimination tasks. We found that systemic (intramuscular) injection of the DREADDs activator (CNO) produced reversible reductions (~20\%) in image-evoked V4 responses compared to the control condition (saline injections). Monkeys showed significant behavioral performance deficits upon CNO injections (compared to saline), which were larger when the object position overlapped with the RF estimates of the transfected V4 neurons. This is consistent with the hypothesis that the suppressed V4 neurons are critical to this behavior. Furthermore, we observed commensurate deficits in the linearly-decoded estimates of object identity from the IT population activity (post-CNO). To model the perturbed brain circuitry, we used a primate brain-mapped artificial neural network (ANN) model (CORnet-S) that supports object recognition. We {\textquotedblleft}lesioned{\textquotedblright} the model{\textquoteright}s corresponding V4 subregion by modifying its weights such that the responses matched a subset of our experimental V4 measurements (post-CNO). Indeed, the lesioned model better predicted the measured (held-out) V4 and IT responses (post-CNO), compared to the model{\textquoteright}s non-lesioned version, validating our approach. In the future, our approach allows us to discriminate amongst competing mechanistic brain models, while the data provides constraints to guide more accurate alternatives.

}, doi = {10.1167/jov.21.9.2489}, url = {https://jov.arvojournals.org/article.aspx?articleid=2777218}, author = {Kohitij Kar and Martin Schrimpf and Kailyn Schmidt and James J. DiCarlo} } @conference {4532, title = {Evidence that recurrent pathways between the prefrontal and inferior temporal cortex is critical during core object recognition }, booktitle = {COSYNE}, year = {2020}, month = {02/2020}, address = {Denver, Colorado, USA}, author = {Kohitij Kar and James J. DiCarlo} } @article {4659, title = {Fast Recurrent Processing via Ventrolateral Prefrontal Cortex Is Needed by the Primate Ventral Stream for Robust Core Visual Object Recognition}, journal = {Neuron}, year = {2020}, month = {10/2020}, abstract = {

Distributed neural population spiking patterns in macaque inferior temporal (IT) cortex that support core object recognition require additional time to develop for specific, {\textquoteleft}{\textquoteleft}late-solved{\textquoteright}{\textquoteright} images. This suggests the necessity of recurrent processing in these computations. Which brain circuits are responsible for computing and transmitting these putative recurrent signals to IT? To test whether the ventrolateral prefrontal cortex (vlPFC) is a critical recurrent node in this system, here, we pharmacologically inactivated parts of vlPFC and simultaneously measured IT activity while monkeys performed object discrimination tasks. vlPFC inactivation deteriorated the quality of late-phase (\>150 ms from image onset) IT population code and produced commensurate behavioral deficits for late-solved images. Finally, silencing vlPFC caused the monkeys{\textquoteright} IT activity and behavior to become more like those produced by feedforward-only ventral stream models. Together with prior work, these results implicate fast recurrent processing through vlPFC as critical to producing behaviorally sufficient object representations in IT.

}, issn = {08966273}, doi = {10.1016/j.neuron.2020.09.035}, url = {https://linkinghub.elsevier.com/retrieve/pii/S0896627320307595}, author = {Kohitij Kar and James J. DiCarlo} } @article {4600, title = {The inferior temporal cortex is a potential cortical precursor of orthographic processing in untrained monkeys}, journal = {Nature Communications}, volume = {11}, year = {2020}, month = {08/2020}, abstract = {

The ability to recognize written letter strings is foundational to human reading, but the underlying neuronal mechanisms remain largely unknown. Recent behavioral research in baboons suggests that non-human primates may provide an opportunity to investigate this question. We recorded the activity of hundreds of neurons in V4 and the inferior temporal cortex (IT) while na{\"\i}ve macaque monkeys passively viewed images of letters, English words and non-word strings, and tested the capacity of those neuronal representations to support a battery of orthographic processing tasks. We found that simple linear read-outs of IT (but not V4) population responses achieved high performance on all tested tasks, even matching the performance and error patterns of baboons on word classification. These results show that the IT cortex of untrained primates can serve as a precursor of orthographic processing, suggesting that the acquisition of reading in humans relies on the recycling of a brain network evolved for other visual functions.

}, doi = {10.1038/s41467-020-17714-3}, url = {http://www.nature.com/articles/s41467-020-17714-3}, author = {Rishi Rajalingham and Kohitij Kar and Sachi Sanghavi and Dehaene, Stanislas and James J. DiCarlo} } @proceedings {4379, title = {Brain-Like Object Recognition with High-Performing Shallow Recurrent ANNs}, year = {2019}, month = {10/2019}, address = {Vancouver, Canada}, abstract = {

Deep convolutional artificial neural networks (ANNs) are the leading class of candidate models of the mechanisms of visual processing in the primate ventral stream. While initially inspired by brain anatomy, over the past years, these ANNs have evolved from a simple eight-layer architecture in AlexNet to extremely deep and branching architectures, demonstrating increasingly better object categorization performance, yet bringing into question how brain-like they still are. In particular, typical deep models from the machine learning community are often hard to map onto the brain{\textquoteright}s anatomy due to their vast number of layers and missing biologically-important connections, such as recurrence. Here we demonstrate that better anatomical alignment to the brain and high performance on machine learning as well as neuroscience measures do not have to be in contradiction. We developed CORnet-S, a shallow ANN with four anatomically mapped areas and recurrent connectivity, guided by Brain-Score, a new large-scale composite of neural and behavioral benchmarks for quantifying the functional fidelity of models of the primate ventral visual stream. Despite being significantly shallower than most models, CORnet-S is the top model on Brain-Score and outperforms similarly compact models on ImageNet. Moreover, our extensive analyses of CORnet-S circuitry variants reveal that recurrence is the main predictive factor of both Brain- Score and ImageNet top-1 performance. Finally, we report that the temporal evolution of the CORnet-S "IT" neural population resembles the actual monkey IT population dynamics. Taken together, these results establish CORnet-S, a compact, recurrent ANN, as the current best model of the primate ventral visual stream.

}, author = {Jonas Kubilius and Martin Schrimpf and Kohitij Kar and Rishi Rajalingham and Ha Hong and Najib J. Majaj and Elias B. Issa and Pouya Bashivan and Jonathan Prescott-Roy and Kailyn Schmidt and Aran Nayebi and Daniel Bear and Daniel L K Yamins and James J. DiCarlo} } @article {4141, title = {Evidence that recurrent circuits are critical to the ventral stream{\textquoteright}s execution of core object recognition behavior}, journal = {Nature Neuroscience}, year = {2019}, month = {04/2019}, abstract = {

Non-recurrent deep convolutional neural networks (DCNNs) are currently the best models of core object recognition; a behavior supported by the densely recurrent primate ventral stream, culminating in the inferior temporal (IT) cortex. Are these recurrent circuits critical to the ventral stream{\textquoteright}s execution of this behavior? We reasoned that, if recurrence is critical, then primates should outperform feedforward-only DCNNs for some images, and that these images should require additional processing time beyond the feedforward IT response. Here we first used behavioral methods to discover hundreds of these {\textquotedblleft}challenge{\textquotedblright} images. Second, using large- scale IT electrophysiology in animals performing core recognition tasks, we observed that behaviorally-sufficient, linearly-decodable object identity solutions emerged ~30ms (on average) later in IT for challenge images compared to DCNN and primate performance-matched {\textquotedblleft}control{\textquotedblright} images. We observed these same late solutions even during passive viewing. Third, consistent with a failure of feedforward computations, the behaviorally-critical late-phase IT population response patterns evoked by the challenge images were poorly predicted by DCNN activations. Interestingly, very deep CNNs as well as not-so-deep but recurrent CNNs better predicted these late IT responses, suggesting a functional equivalence between additional nonlinear transformations and recurrence. Our results argue that automatically-evoked recurrent circuits are critical even for rapid object identification. By precisely comparing current DCNNs, primate behavior and IT population dynamics, we provide guidance for future recurrent model development.

}, doi = {10.1038/s41593-019-0392-5}, url = {https://www.nature.com/articles/s41593-019-0392-5}, author = {Kohitij Kar and Jonas Kubilius and Kailyn Schmidt and Elias B. Issa and James J. DiCarlo} } @conference {4531, title = {Evidence that recurrent pathways between the prefrontal and inferior temporal cortex is critical during core object recognition }, booktitle = {Society for Neuroscience}, year = {2019}, month = {10/2019}, address = {Chicago, IL, USA}, author = {Kohitij Kar and James J. DiCarlo} } @article {4143, title = {Neural Population Control via Deep Image Synthesis}, journal = {Science}, volume = {364}, year = {2019}, month = {05/2019}, abstract = {Particular deep artificial neural networks (ANNs) are today{\textquoteright}s most accurate models of the primate brain{\textquoteright}s ventral visual stream. Here we report that, using an ANN-driven image synthesis method, new luminous power patterns (i.e. images) can be applied to the primate retinae to predictably push the spiking activity of targeted V4 neural sites beyond naturally occurring levels. More importantly, this method, while not yet perfect, achieves unprecedented independent control of the activity state of entire populations of V4 neural sites, even those with overlapping receptive fields. These results show how the knowledge embedded in today{\textquoteright}s ANN models might be used to noninvasively set desired internal brain states at neuron-level resolution, and suggest that more accurate ANN models would produce even more accurate control. }, doi = {10.1126/science.aav9436 }, url = {https://science.sciencemag.org/content/364/6439/eaav9436}, author = {Pouya Bashivan and Kohitij Kar and James J. DiCarlo} } @article {4294, title = {Brain-Score: Which Artificial Neural Network for Object Recognition is most Brain-Like?}, journal = {bioRxiv preprint}, year = {2018}, abstract = {

The internal representations of early deep artificial neural networks (ANNs) were found to be remarkably similar to the internal neural representations measured experimentally in the primate brain. Here we ask, as deep ANNs have continued to evolve, are they becoming more or less brain-like? ANNs that are most functionally similar to the brain will contain mechanisms that are most like those used by the brain. We therefore developed Brain-Score {\textendash} a composite of multiple neural and behavioral benchmarks that score any ANN on how similar it is to the brain{\textquoteright}s mechanisms for core object recognition {\textendash} and we deployed it to evaluate a wide range of state-of-the-art deep ANNs. Using this scoring system, we here report that: (1) DenseNet-169, CORnet-S and ResNet-101 are the most brain-like ANNs. There remains considerable variability in neural and behavioral responses that is not predicted by any ANN, suggesting that no ANN model has yet captured all the relevant mechanisms. (3) Extending prior work, we found that gains in ANN ImageNet performance led to gains on Brain-Score. However, correlation weakened at >= 70\% top-1 ImageNet performance, suggesting that additional guidance from neuroscience is needed to make further advances in capturing brain mechanisms. (4) We uncovered smaller (i.e. less complex) ANNs that are more brain-like than many of the best-performing ImageNet models, which suggests the opportunity to simplify ANNs to better understand the ventral stream. The scoring system used here is far from complete. However, we propose that evaluating and tracking model-benchmark correspondences through a Brain-Score that is regularly updated with new brain data is an exciting opportunity: experimental benchmarks can be used to guide machine network evolution, and machine networks are mechanistic hypotheses of the brain{\textquoteright}s network and thus drive next experiments. To facilitate both of these, we release Brain-Score.org: a platform that hosts the neural and behavioral benchmarks, where ANNs for visual processing can be submitted to receive a Brain-Score and their rank relative to other models, and where new experimental data can be naturally incorporated.

}, keywords = {computational neuroscience, deep learning, Neural Networks, object recognition, ventral stream}, doi = {10.1101/407007}, url = {https://www.biorxiv.org/content/10.1101/407007v1}, author = {Martin Schrimpf and Jonas Kubilius}, editor = {Ha Hong and Najib J. Majaj and Rishi Rajalingham and Elias B. Issa and Kohitij Kar and Pouya Bashivan and Jonathan Prescott-Roy and Kailyn Schmidt and Daniel L K Yamins and James J. DiCarlo} } @article {3881, title = {Single units in a deep neural network functionally correspond with neurons in the brain: preliminary results}, year = {2018}, month = {11/2018}, abstract = {

Deep neural networks have been shown to predict neural responses in higher visual cortex. The mapping from the model to a neuron in the brain occurs through a linear combination of many units in the model, leaving open the question of whether there also exists a correspondence at the level of individual neurons. Here we show that there exist many one-to-one mappings between single units in a deep neural network model and neurons in the brain. We show that this correspondence at the single- unit level is ubiquitous among state-of-the-art deep neural networks, and grows more pronounced for models with higher performance on a large-scale visual recognition task. Comparing matched populations{\textemdash}in the brain and in a model{\textemdash}we demonstrate a further correspondence at the level of the population code: stimulus category can be partially decoded from real neural responses using a classifier trained purely on a matched population of artificial units in a model. This provides a new point of investigation for phenomena which require fine-grained mappings between deep neural networks and the brain.

}, author = {Luke Arend and Yena Han and Martin Schrimpf and Pouya Bashivan and Kohitij Kar and Tomaso Poggio and James J. DiCarlo and Xavier Boix} }