@article {4787, title = {Multi-task reinforcement learning in humans}, journal = {Nature Human Behaviour}, year = {2021}, month = {01/2021}, abstract = {

The ability to transfer knowledge across tasks and generalize to novel ones is an important hallmark of human intelligence. Yet not much is known about human multitask reinforcement learning. We study participants{\textquoteright} behaviour in a two-step decision-making task with multiple features and changing reward functions. We compare their behaviour with two algorithms for multitask reinforcement learning, one that maps previous policies and encountered features to new reward functions and one that approximates value functions across tasks, as well as to standard model-based and model-free algorithms. Across three exploratory experiments and a large preregistered confirmatory experiment, our results provide evidence that partici-pants who are able to learn the task use a strategy that maps previously learned policies to novel scenarios. These results enrich our understanding of human reinforcement learning in complex environments with changing task demands.

}, doi = {10.1038/s41562-020-01035-y}, url = {http://www.nature.com/articles/s41562-020-01035-y}, author = {Tomov, Momchil S. and Eric Schulz and Samuel J Gershman} } @article {4501, title = {A theory of learning to infer.}, journal = {Psychological Review}, volume = {127}, year = {2020}, month = {04/2020}, pages = {412 - 441}, abstract = {

Bayesian theories of cognition assume that people can integrate probabilities rationally. However, several empirical findings contradict this proposition: human probabilistic inferences are prone to systematic deviations from optimality. Puzzlingly, these deviations sometimes go in opposite directions. Whereas some studies suggest that people underreact to prior probabilities (base rate neglect), other studies find that people underreact to the likelihood of the data (conservatism). We argue that these deviations arise because the human brain does not rely solely on a general-purpose mechanism for approximating Bayesian inference that is invariant across queries. Instead, the brain is equipped with a recognition model that maps queries to probability distributions. The parameters of this recognition model are optimized to get the output as close as possible, on average, to the true posterior. Because of our limited computational resources, the recognition model will allocate its resources so as to be more accurate for high probability queries than for low probability queries. By adapting to the query distribution, the recognition model learns to infer. We show that this theory can explain why and when people underreact to the data or the prior, and a new experiment demonstrates that these two forms of underreaction can be systematically controlled by manipulating the query distribution. The theory also explains a range of related phenomena: memory effects, belief bias, and the structure of response variability in probabilistic reasoning. We also discuss how the theory can be integrated with prior sampling-based accounts of approximate inference.

}, issn = {0033-295X}, doi = {10.1037/rev0000178}, url = {http://doi.apa.org/getdoi.cfm?doi=10.1037/rev0000178}, author = {Ishita Dasgupta and Eric Schulz and Joshua B. Tenenbaum and Samuel J Gershman} } @article {3440, title = {Compositional inductive biases in function learning.}, journal = {Cogn Psychol}, volume = {99}, year = {2017}, month = {2017 Dec}, pages = {44-79}, abstract = {

How do people recognize and learn about complex functional structure? Taking inspiration from other areas of cognitive science, we propose that this is achieved by harnessing compositionality: complex structure is decomposed into simpler building blocks. We formalize this idea within the framework of Bayesian regression using a grammar over Gaussian process kernels, and compare this approach with other structure learning approaches. Participants consistently chose compositional (over non-compositional) extrapolations and interpolations of functions. Experiments designed to elicit priors over functional patterns revealed an inductive bias for compositional structure. Compositional functions were perceived as subjectively more predictable than non-compositional functions, and exhibited other signatures of predictability, such as enhanced memorability and reduced numerosity. Taken together, these results support the view that the human intuitive theory of functions is inherently compositional.

}, issn = {1095-5623}, doi = {10.1016/j.cogpsych.2017.11.002}, url = {https://www.sciencedirect.com/science/article/pii/S0010028517301743?via\%3Dihub}, author = {Eric Schulz and Joshua B. Tenenbaum and David Duvenaud and Maarten Speekenbrink and Samuel J Gershman} }