@proceedings {4389, title = {Untangling in Invariant Speech Recognition}, year = {2019}, month = {11/2019}, address = {Vancouver, Canada}, abstract = {

Encouraged by the success of deep convolutional neural networks on a variety of visual tasks, much theoretical and experimental work has been aimed at understanding and interpreting how vision networks operate. At the same time, deep neural networks have also achieved impressive performance in audio processing applications, both as sub-components of larger systems and as complete end-to-end systems by themselves. Despite their empirical successes, comparatively little is understood about how these audio models accomplish these tasks.In this work, we employ a recently developed statistical mechanical theory that connects geometric properties of network representations and the separability of classes to probe how information is untangled within neural networks trained to recognize speech. We observe that speaker-specific nuisance variations are discarded by the network{\textquoteright}s hierarchy, whereas task-relevant properties such as words and phonemes are untangled in later layers. Higher level concepts such as parts-of-speech and context dependence also emerge in the later layers of the network. Finally, we find that the deep representations carry out significant temporal untangling by efficiently extracting task-relevant features at each time step of the computation. Taken together, these findings shed light on how deep auditory models process their time dependent input signals to carry out invariant speech recognition, and show how different concepts emerge through the layers of the network.

}, author = {Cory Stephenson and Jenelle Feather and Suchismita Padhy and Oguz Elibol and Hanlin Tang and Josh H. McDermott and SueYeon Chung} } @article {3764, title = {Recurrent computations for visual pattern completion}, journal = {Proceedings of the National Academy of Sciences}, year = {2018}, month = {08/2018}, abstract = {

Making inferences from partial information constitutes a critical aspect of cognition. During visual perception, pattern completion enables recognition of poorly visible or occluded objects. We combined psychophysics, physiology, and computational models to test the hypothesis that pattern completion is implemented by recurrent computations and present three pieces of evidence that are consistent with this hypothesis. First, subjects robustly recognized objects even when they were rendered \<15\% visible, but recognition was largely impaired when processing was interrupted by backward masking. Second, invasive physiological responses along the human ventral cortex exhibited visually selective responses to partially visible objects that were delayed compared with whole objects, suggesting the need for additional computations. These physiological delays were correlated with the effects of backward masking. Third, state-of-the-art feed-forward computational architectures were not robust to partial visibility. However, recognition performance was recovered when the model was augmented with attractor-based recurrent connectivity. The recurrent model was able to predict which images of heavily occluded objects were easier or harder for humans to recognize, could capture the effect of introducing a backward mask on recognition behavior, and was consistent with the physiological delays along the human ventral visual stream. These results provide a strong argument of plausibility for the role of recurrent computations in making visual inferences from partial information.

}, keywords = {Artificial Intelligence, computational neuroscience, Machine Learning, pattern completion, Visual object recognition}, issn = {0027-8424}, doi = {10.1073/pnas.1719397115}, url = {http://www.pnas.org/lookup/doi/10.1073/pnas.1719397115}, author = {Hanlin Tang and Martin Schrimpf and William Lotter and Moerman, Charlotte and Paredes, Ana and Ortega Caro, Josue and Hardesty, Walter and David Cox and Gabriel Kreiman} } @inbook {2126, title = {Recognition of occluded objects}, booktitle = {Computational and Cognitive Neuroscience of Vision}, year = {2017}, publisher = {Springer Singapore}, organization = {Springer Singapore}, issn = {978-981-10-0211-3}, url = {http://www.springer.com/us/book/9789811002113}, author = {Hanlin Tang and Gabriel Kreiman and Qi Zhao} } @article {2261, title = {Cascade of neural processing orchestrates cognitive control in human frontal cortex [code]}, year = {2016}, publisher = {eLife}, abstract = {

Code and data used to create the figures of Tang et al. (2016).\  The results from this work show that there is a dynamic and hierarchical sequence of steps in human frontal cortex orchestrates cognitive control.

Used in conjunction with this mirrored CBMM Dataset entry

}, url = {http://klab.tch.harvard.edu/resources/tangetal_stroop_2016.html}, author = {Hanlin Tang and Hsiang-Yu Yu and Chien-Chen Chou and Crone, Nathan~E. and Joseph Madsen and WS Anderson and Gabriel Kreiman} } @article {2262, title = {Cascade of neural processing orchestrates cognitive control in human frontal cortex [dataset]}, year = {2016}, publisher = {eLife}, abstract = {

Code and data used to create the figures of Tang et al. (2016).\  The results from this work show that there is a dynamic and hierarchical sequence of steps in human frontal cortex orchestrates cognitive control.

Used in conjunction with this mirrored CBMM Code entry

}, url = {http://klab.tch.harvard.edu/resources/tangetal_stroop_2016.html}, author = {Hanlin Tang and Hsiang-Yu Yu and Chien-Chen Chou and Crone, Nathan~E. and Joseph Madsen and WS Anderson and Gabriel Kreiman} } @article {1847, title = {Cascade of neural processing orchestrates cognitive control in human frontal cortex}, journal = {eLIFE}, year = {2016}, month = {02/2016}, abstract = {
Rapid and flexible interpretation of conflicting sensory inputs in the context of current goals is a critical component of cognitive control that is orchestrated by frontal cortex. The relative roles of distinct subregions within frontal cortex are poorly understood. To examine the dynamics underlying cognitive control across frontal regions, we took advantage of the spatiotemporal resolution of intracranial recordings in epilepsy patients while subjects resolved color-word conflict.We observed differential activity preceding the behavioral responses to conflict trials throughout frontal cortex; this activity was correlated with behavioral reaction times. These signals emerged first in anterior cingulate cortex (ACC) before dorsolateral prefrontal cortex (dlPFC), followed bymedial frontal cortex (mFC) and then by orbitofrontal cortex (OFC). These results disassociate the frontal subregions based on their dynamics, and suggest a temporal hierarchy for cognitive control in human cortex.
}, doi = {10.7554/eLife.12352}, url = {http://dx.doi.org/10.7554/eLife.12352}, author = {Hanlin Tang and Yu, HY and Chou, CC and NE Crone and Joseph Madsen and WS Anderson and Gabriel Kreiman} } @proceedings {2121, title = {A machine learning approach to predict episodic memory formation}, year = {2016}, pages = {539 - 544 }, address = {Princeton, NJ }, doi = {10.1109/CISS.2016.7460560}, url = {http://ieeexplore.ieee.org/xpl/articleDetails.jsp?arnumber=7460560\&newsearch=true\&queryText=A\%20machine\%20learning\%20approach\%20to\%20predict\%20episodic\%20memory\%20formation}, author = {Hanlin Tang and Jedediah Singer and Matias J. Ison and Gnel Pivazyan and Melissa Romaine and Elizabeth Meller and Victoria Perron and Marlise Arlellano and Gabriel Kreiman and Melissa Romaine and Adrianna Boulin and Rosa Frias and James Carroll and Sarah Dowcett} } @article {2565, title = {Predicting episodic memory formation for movie events}, journal = {Scientific Reports}, year = {2016}, month = {10/2016}, abstract = {

Episodic memories are long lasting and full of detail, yet imperfect and malleable. We quantitatively evaluated recollection of short audiovisual segments from movies as a proxy to real-life memory formation in 161 subjects at 15 minutes up to a year after encoding. Memories were reproducible within and across individuals, showed the typical decay with time elapsed between encoding and testing, were fallible yet accurate, and were insensitive to low-level stimulus manipulations but sensitive to high-level stimulus properties. Remarkably, memorability was also high for single movie frames, even one year post-encoding. To evaluate what determines the efficacy of long-term memory formation, we developed an extensive set of content annotations that included actions, emotional valence, visual cues and auditory cues. These annotations enabled us to document the content properties that showed a stronger correlation with recognition memory and to build a machine-learning computational model that accounted for episodic memory formation in single events for group averages and individual subjects with an accuracy of up to 80\%. These results provide initial steps towards the development of a quantitative computational theory capable of explaining the subjective filtering steps that lead to how humans learn and consolidate memories.

}, doi = {10.1038/srep30175}, url = {http://www.nature.com/articles/srep30175}, author = {Hanlin Tang and Jedediah Singer and Matias J. Ison and Gnel Pivazyan and Melissa Romaine and Rosa Frias and Elizabeth Meller and Adrianna Boulin and James Carroll and Victoria Perron and Sarah Dowcett and Arellano, Marlise and Gabriel Kreiman} } @article {2885, title = {Predicting episodic memory formation for movie events [code]}, year = {2016}, abstract = {

Episodic memories are long lasting and full of detail, yet imperfect and malleable. We quantitatively\  evaluated recollection of short audiovisual segments from movies as a proxy to real-life memory\  formation in 161 subjects at 15\  minutes up to a year after encoding. Memories were reproducible within\  and across individuals, showed the typical decay with time elapsed between encoding and testing,\  were fallible yet accurate, and were insensitive to low-level stimulus manipulations but sensitive to\  high-level stimulus properties. Remarkably, memorability was also high for single movie frames, even\  one year post-encoding. To evaluate what determines the efficacy of long-term memory formation,\  we developed an extensive set of content annotations that included actions, emotional valence, visual\  cues and auditory cues. These annotations enabled us to document the content properties that showed\  a stronger correlation with recognition memory and to build a machine-learning computational model\  that accounted for episodic memory formation in single events for group averages and individual\  subjects with an accuracy of up to 80\%. These results provide initial steps towards the development of a\  quantitative computational theory capable of explaining the subjective filtering steps that lead to how\  humans learn and consolidate memories.


To view more information and dowload datasets, etc. please visit the project website - http://klab.tch.harvard.edu/resources/Tangetal_episodicmemory_2016.html$\#$sthash.cj1STRah.bumwWxcX.dpbs


The corresponding publication can be found here.


The corresponding code entry can be found here.

}, author = {Hanlin Tang and Jedediah Singer and Matias Ison and Gnel Pivazyan and Melissa Romaine and Rosa Frias and Elizabeth Meller and Adrianna Boulin and James Carroll and Victoria Perron and Sarah Dowcett and Marlise Arlellano and Gabriel Kreiman} } @article {2886, title = {Predicting episodic memory formation for movie events [dataset]}, year = {2016}, abstract = {

Episodic memories are long lasting and full of detail, yet imperfect and malleable. We quantitatively\  evaluated recollection of short audiovisual segments from movies as a proxy to real-life memory\  formation in 161 subjects at 15\  minutes up to a year after encoding. Memories were reproducible within\  and across individuals, showed the typical decay with time elapsed between encoding and testing,\  were fallible yet accurate, and were insensitive to low-level stimulus manipulations but sensitive to\  high-level stimulus properties. Remarkably, memorability was also high for single movie frames, even\  one year post-encoding. To evaluate what determines the efficacy of long-term memory formation,\  we developed an extensive set of content annotations that included actions, emotional valence, visual\  cues and auditory cues. These annotations enabled us to document the content properties that showed\  a stronger correlation with recognition memory and to build a machine-learning computational model\  that accounted for episodic memory formation in single events for group averages and individual\  subjects with an accuracy of up to 80\%. These results provide initial steps towards the development of a\  quantitative computational theory capable of explaining the subjective filtering steps that lead to how\  humans learn and consolidate memories.


To view more information and dowload datasets, etc. please visit the project website - http://klab.tch.harvard.edu/resources/Tangetal_episodicmemory_2016.html$\#$sthash.cj1STRah.bumwWxcX.dpbs


The corresponding publication can be found here.


The corresponding code entry can be found here.

}, author = {Hanlin Tang and Jedediah Singer and Matias Ison and Gnel Pivazyan and Melissa Romaine and Rosa Frias and Elizabeth Meller and Adrianna Boulin and James Carroll and Victoria Perron and Sarah Dowcett and Marlise Arlellano and Gabriel Kreiman} } @article {1155, title = {Decrease in gamma-band activity tracks sequence learning}, journal = {Frontiers in Systems Neuroscience}, volume = {8}, year = {2015}, month = {01/21/2015}, abstract = {

Learning novel sequences constitutes an example of declarative memory formation, involving conscious recall of temporal events. Performance in sequence learning tasks improves with repetition and involves forming temporal associations over scales of seconds to minutes. To further understand the neural circuits underlying declarative sequence learning over trials, we tracked changes in intracranial field potentials (IFPs) recorded from 1142 electrodes implanted throughout temporal and frontal cortical areas in 14 human subjects, while they learned the temporal-order of multiple sequences of images over trials through repeated recall. We observed an increase in power in the gamma frequency band (30{\textendash}100 Hz) in the recall phase, particularly in areas within the temporal lobe including the parahippocampal gyrus. The degree of this gamma power enhancement decreased over trials with improved sequence recall. Modulation of gamma power was directly correlated with the improvement in recall performance. When presenting new sequences, gamma power was reset to high values and decreased again after learning. These observations suggest that signals in the gamma frequency band may play a more prominent role during the early steps of the learning process rather than during the maintenance of memory traces.

}, doi = {10.3389/fnsys.2014.00222}, url = {http://journal.frontiersin.org/article/10.3389/fnsys.2014.00222/abstract}, author = {Radhika Madhavan and Daniel Millman and Hanlin Tang and NE Crone and Fredrick A. Lenz and Travis S Tierney and Joseph Madsen and Gabriel Kreiman and WS Anderson} } @article {456, title = {A role for recurrent processing in object completion: neurophysiological, psychophysical and computational evidence.}, number = {009}, year = {2014}, month = {04/2014}, abstract = {

Recognition of objects from partial information presents a significant challenge for theories of vision because it requires spatial integration and extrapolation from prior knowledge. We combined neurophysiological recordings in human cortex with psychophysical measurements and computational modeling to investigate the mechanisms involved in object completion. We recorded intracranial field potentials from 1,699 electrodes in 18 epilepsy patients to measure the timing and selectivity of responses along human visual cortex to whole and partial objects. Responses along the ventral visual stream remained selective despite showing only 9\>25 of the object. However, these visually selective signals emerged ~100 ms later for partial versus whole objects. The processing delays were particularly pronounced in higher visual areas within the ventral stream, suggesting the involvement of additional recurrent processing. In separate psychophysics experiments, disrupting this recurrent computation with a backward mask at ~75ms significantly impaired recognition of partial, but not whole, objects. Additionally, computational modeling shows that the performance of a purely bottom\>up architecture is impaired by heavy occlusion and that this effect can be partially rescued via the incorporation of top\>down connections. These results provide spatiotemporal constraints on theories of object recognition that involve recurrent processing to recognize objects from partial information.

}, author = {Hanlin Tang and Buia, Calin and Joseph Madsen and WS Anderson and Gabriel Kreiman} } @article {217, title = {Spatiotemporal Dynamics Underlying Object Completion in Human Ventral Visual Cortex}, journal = {Neuron}, volume = {83}, year = {2014}, month = {08/06/2014}, pages = {736 - 748}, abstract = {

Natural vision often involves recognizing objects from partial information. Recognition of objects from parts presents a significant challenge for theories of vision because it requires spatial integration and extrapolation from prior knowledge. Here we recorded intracranial field potentials of 113 visually selective electrodes from epilepsy patients in response to whole and partial objects. Responses along the ventral visual stream, particularly the Inferior Occipital and Fusiform Gyri, remained selective despite showing only 9-25\% of the object areas. However, these visually selective signals emerged ~100 ms later for partial versus whole objects. These processing delays were particularly pronounced in higher visual areas within the ventral stream. This latency difference persisted when controlling for changes in contrast, signal amplitude, and the strength of selectivity. These results argue against a purely feed-forward explanation of recognition from partial information, and provide spatiotemporal constraints on theories of object recognition that involve recurrent processing.

}, keywords = {Circuits for Intelligence, vision}, issn = {08966273}, doi = {10.1016/j.neuron.2014.06.017}, url = {http://linkinghub.elsevier.com/retrieve/pii/S089662731400539Xhttp://api.elsevier.com/content/article/PII:S089662731400539X?httpAccept=text/xmlhttp://api.elsevier.com/content/article/PII:S089662731400539X?httpAccept=text/plain}, author = {Hanlin Tang and Buia, Calin and Radhika Madhavan and NE Crone and Joseph Madsen and WS Anderson and Gabriel Kreiman} }