@conference {5322, title = {Zero-shot linear combinations of grounded social interactions with Linear Social MDPs}, booktitle = {Proceedings of the 37th AAAI Conference on Artificial Intelligence (AAAI)}, year = {2023}, month = {02/2023}, abstract = {

Humans and animals engage in rich social interactions. It is often theorized that a relatively small number of basic social interactions give rise to the full range of behavior observed. But no computational theory explaining how social interactions combine together has been proposed before. We do so here. We take a model, the Social MDP, which is able to express a range of social interactions, and extend it to represent linear combinations of social interactions. Practically for robotics applications, such models are now able to not just express that an agent should help another agent, but to express goal-centric social interactions. Perhaps an agent is helping someone get dressed, but preventing them from falling, and is happy to exchange stories in the meantime. How an agent responds socially, should depend on what it thinks the other agent is doing at that point in time. To encode this notion, we take linear combinations of social interactions as defined in Social MDPs, and compute the weights on those combinations on the fly depending on the estimated goals of other agents. This new model, the Linear Social MDP, enables zero-shot reasoning about complex social interactions, provides a mathematical basis for the long-standing intuition that social interactions should compose, and leads to interesting new behaviors that we validate using human observers. Complex social interactions are part of the future of intelligent agents, and having principled mathematical models built on a foundation like MDPs will make it possible to bring social interactions to every robotic application.

}, author = {Ravi Tejwani and Yen-Ling Kuo and Tianmin Shu and Bennett Stankovits and Dan Gutfreund and Joshua B. Tenenbaum and Boris Katz and Andrei Barbu} } @article {5050, title = {Incorporating Rich Social Interactions Into MDPs}, year = {2022}, abstract = {

Much of what we do as humans is engage socially with other agents, a skill that robots must also eventually possess. We demonstrate that a rich theory of social interactions originating from microso- ciology and economics can be formalized by extending a nested MDP where agents reason about arbitrary functions of each other{\textquoteright}s hidden rewards. This extended Social MDP allows us to encode the five basic interactions that underlie microsociology: cooperation, conflict, coercion, competition, and exchange. The result is a robotic agent capable of executing social interactions zero-shot in new environments; like humans it can engage socially in novel ways even without a single example of that social interaction. Moreover, the judgments of these Social MDPs align closely with those of humans when considering which social interaction is taking place in an environment. This method both sheds light on the nature of social interactions, by providing concrete mathematical definitions, and brings rich social interactions into a mathematical framework that has proven to be natural for robotics, MDPs.

}, author = {Ravi Tejwani and Yen-Ling Kuo and Tianmin Shu and Bennett Stankovits and Dan Gutfreund and Joshua B. Tenenbaum and Boris Katz and Andrei Barbu} } @article {5065, title = { AGENT: A Benchmark for Core Psychological Reasoning}, year = {2021}, month = {07/2021}, author = {Tianmin Shu and Abhishek Bhandwaldar and Chuang Gan and Kevin A Smith and Shari Liu and Dan Gutfreund and Elizabeth S Spelke and Joshua B. Tenenbaum and Tomer D. Ullman} } @article {4632, title = {ThreeDWorld: A Platform for Interactive Multi-Modal Physical Simulation}, journal = {arXiv}, year = {2020}, month = {07/2020}, type = {Preprint}, abstract = {

We introduce ThreeDWorld (TDW), a platform for interactive multi-modal physical simulation. With TDW, users can simulate high-fidelity sensory data and physical interactions between mobile agents and objects in a wide variety of rich 3D environments. TDW has several unique properties: 1) realtime near photo-realistic image rendering quality; 2) a library of objects and environments with materials for high-quality rendering, and routines enabling user customization of the asset library; 3) generative procedures for efficiently building classes of new environments 4) high-fidelity audio rendering; 5) believable and realistic physical interactions for a wide variety of material types, including cloths, liquid, and deformable objects; 6) a range of "avatar" types that serve as embodiments of AI agents, with the option for user avatar customization; and 7) support for human interactions with VR devices. TDW also provides a rich API enabling multiple agents to interact within a simulation and return a range of sensor and physics data representing the state of the world. We present initial experiments enabled by the platform around emerging research directions in computer vision, machine learning, and cognitive science, including multi-modal physical scene understanding, multi-agent interactions, models that "learn like a child", and attention studies in humans and neural networks. The simulation platform will be made publicly available.

}, url = {https://arxiv.org/abs/2007.04954}, author = {Chuang Gen and Jeremy Schwartz and Seth Alter and Martin Schrimpf and James Traer and Julian De Freitas and Jonas Kubilius and Abhishek Bhandwaldar and Nick Haber and Megumi Sano and Kuno Kim and Elias Wang and Damian Mrowca and Michael Lingelbach and Aidan Curtis and Kevin Feigleis and Daniel Bear and Dan Gutfreund and David Cox and James J. DiCarlo and Josh H. McDermott and Joshua B. Tenenbaum and Daniel L K Yamins} } @article {4633, title = {ThreeDWorld (TDW): A High-Fidelity, Multi-Modal Platform for Interactive Physical Simulation}, year = {2020}, month = {07/2020}, abstract = {

TDW is a 3D virtual world simulation platform, utilizing state-of-the-art video game engine technology

A TDW simulation consists of two components: a) the Build, a compiled executable running on the Unity3D Engine, which is responsible for image rendering, audio synthesis and physics simulations; and b) the Controller, an external Python interface to communicate with the build.

Researchers write Controllers that send commands to the Build, which executes those commands and returns a broad range of data types representing the state of the virtual world.

TDW provides researchers with:

TDW is being used on a daily basis in multiple labs, supporting research that sits at the nexus of neuroscience, cognitive science and artificial intelligence.

Find out more about ThreeDWorld on the project weobsite using the link below.

}, url = {http://www.threedworld.org/}, author = {Jeremy Schwartz and Seth Alter and James J. DiCarlo and Josh H. McDermott and Joshua B. Tenenbaum and Daniel L K Yamins and Dan Gutfreund and Chuang Gan and James Traer and Jonas Kubilius and Martin Schrimpf and Abhishek Bhandwaldar and Julian De Freitas and Damian Mrowca and Michael Lingelbach and Megumi Sano and Daniel Bear and Kuno Kim and Nick Haber and Chaofei Fan} } @proceedings {4388, title = {ObjectNet: A large-scale bias-controlled dataset for pushing the limits of object recognition models}, year = {2019}, month = {11/2019}, address = {Vancouver, Canada}, abstract = {

We collect a large real-world test set, ObjectNet, for object recognition with controls where object backgrounds, rotations, and imaging viewpoints are random. Most scientific experiments have controls, confounds which are removed from the data, to ensure that subjects cannot perform a task by exploiting trivial correlations in the data. Historically, large machine learning and computer vision datasets have lacked such controls. This has resulted in models that must be fine-tuned for new datasets and perform better on datasets than in real-world applications. When tested on ObjectNet, object detectors show a 40-45\% drop in performance, with respect to their performance on other benchmarks, due to the controls for biases. Controls make ObjectNet robust to fine-tuning showing only small performance increases. We develop a highly automated platform that enables gathering datasets with controls by crowdsourcing image capturing and annotation. ObjectNet is the same size as the ImageNet test set (50,000 images), and by design does not come paired with a training set in order to encourage generalization. The dataset is both easier than ImageNet (objects are largely centered and unoccluded) and harder (due to the controls). Although we focus on object recognition here, data with controls can be gathered at scale using automated tools throughout machine learning to generate datasets that exercise models in new ways thus providing valuable feedback to researchers. This work opens up new avenues for research in generalizable, robust, and more human-like computer vision and in creating datasets where results are predictive of real-world performance.

}, author = {Andrei Barbu and David Mayo and Julian Alverio and William Luo and Christopher Wang and Dan Gutfreund and Joshua B. Tenenbaum and Boris Katz} }