@conference {4109, title = {Grounding language acquisition by training semantic parsersusing captioned videos}, booktitle = {Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing (EMNLP 2018), }, year = {2018}, month = {10/2018 }, address = {Brussels, Belgium}, abstract = {

We develop a semantic parser that is trained ina grounded setting using pairs of videos cap-tioned with sentences. This setting is bothdata-efficient, requiring little annotation, andsimilar to the experience of children wherethey observe their environment and listen tospeakers. The semantic parser recovers themeaning of English sentences despite not hav-ing access to any annotated sentences. It doesso despite the ambiguity inherent in visionwhere a sentence may refer to any combina-tion of objects, object properties, relations oractions taken by any agent in a video. For thistask, we collected a new dataset for groundedlanguage acquisition. Learning a grounded se-mantic parser {\textemdash} turning sentences into logi-cal forms using captioned videos {\textemdash} can sig-nificantly expand the range of data that parserscan be trained on, lower the effort of training asemantic parser, and ultimately lead to a betterunderstanding of child language acquisition.

}, isbn = {978-1-948087-84-1}, url = {http://aclweb.org/anthology/D18-1285}, author = {Candace Ross and Andrei Barbu and Yevgeni Berzak and Battushig Myanganbayar and Boris Katz} } @article {3963, title = {Partially Occluded Hands: A challenging new dataset for single-image hand pose estimation}, year = {2018}, month = {12/2018}, abstract = {

Recognizing the pose of hands matters most when hands are interacting with other objects. To understand how well both machines and humans perform on single-image 2D hand-pose reconstruction from RGB images, we collected a challenging dataset of hands interacting with 148 objects. We used a novel methodology that provides the same hand in the same pose both with the object being present and occluding the hand and without the object occluding the hand. Additionally, we collected a wide range of grasps for each object designing the data collection methodology to ensure this diversity. Using this dataset we measured the performance of two state-of-the-art hand-pose recognition methods showing that both are extremely brittle when faced with even light occlusion from an object. This is not evident in previous datasets because they often avoid hand- object occlusions and because they are collected from videos where hands are often between objects and mostly unoccluded. We annotated a subset of the dataset and used that to show that humans are robust with respect to occlusion, and also to characterize human hand perception, the space of grasps that seem to be considered, and the accuracy of reconstructing occluded portions of hands. We expect that such data will be of interest to both the vision community for developing more robust hand-pose algorithms and to the robotic grasp planning community for learning such grasps. The dataset is available at occludedhands.com

}, keywords = {dataset, Partial occlusion, RGB hand-pose reconstruction}, author = {Battushig Myanganbayar and Cristina Mata and Gil Dekel and Katz, Boris and Guy Ben-Yosef and Andrei Barbu} } @conference {3964, title = {Partially Occluded Hands: A challenging new dataset for single-image hand pose estimation}, booktitle = {The 14th Asian Conference on Computer Vision (ACCV 2018)}, year = {2018}, month = {12/2018}, abstract = {

Recognizing the pose of hands matters most when hands are interacting with other objects. To understand how well both machines and humans perform on single-image 2D hand-pose reconstruction from RGB images, we collected a challenging dataset of hands interacting with 148 objects. We used a novel methodology that provides the same hand in the same pose both with the object being present and occluding the hand and without the object occluding the hand. Additionally, we collected a wide range of grasps for each object designing the data collection methodology to ensure this diversity. Using this dataset we measured the performance of two state-of-the-art hand-pose recognition methods showing that both are extremely brittle when faced with even light occlusion from an object. This is not evident in previous datasets because they often avoid hand- object occlusions and because they are collected from videos where hands are often between objects and mostly unoccluded. We annotated a subset of the dataset and used that to show that humans are robust with respect to occlusion, and also to characterize human hand perception, the space of grasps that seem to be considered, and the accuracy of reconstructing occluded portions of hands. We expect that such data will be of interest to both the vision community for developing more robust hand-pose algorithms and to the robotic grasp planning community for learning such grasps. The dataset is available at occludedhands.com

}, keywords = {dataset, Partial occlusion, RGB hand-pose reconstruction}, url = {http://accv2018.net/}, author = {Battushig Myanganbayar and Cristina Mata and Gil Dekel and Boris Katz and Guy Ben-Yosef and Andrei Barbu} }