@article {1885, title = {Language and Vision Ambiguities (LAVA) Corpus}, year = {2016}, month = {01/2016}, abstract = {

Ambiguity is one of the defining characteristics of human languages, and language understanding crucially relies on the ability to obtain unambiguous representations of linguistic content. While some ambiguities can be resolved using intra-linguistic contextual cues, the disambiguation of many linguistic constructions requires integration of world knowledge and perceptual information obtained from other modalities. In this work, we focus on the problem of grounding language in the visual modality, and introduce a novel task for visual and linguistic understanding which requires resolving linguistic ambiguities by utilizing the visual context of the utterance.

To address this challenge, we release the Language and Vision Ambiguities (LAVA) corpus. LAVA contains ambiguous sentences coupled with visual scenes that depict the different interpretations of each sentence. The sentences in the corpus are annotated with syntactic and semantic parses, and cover a wide range of linguistic ambiguities, including PP and VP attachment, conjunctions, logical forms, anaphora and ellipsis. In addition to the sentence disambiguation challenge, the corpus will support a variety of related tasks which use natural language as a medium for expressing visual understanding.

Reference:
Yevgeni Berzak, Andrei Barbu, Daniel Harari, Boris Katz, and Shimon Ullman (2015). Do You See What I Mean? Visual Resolution of Linguistic Ambiguities. Conference on Empirical Methods in Natural Language Processing (EMNLP), Lisbon, Portugal. [PDF]

Download all of the clips in MP4 format (ZIP)

}, url = {http://web.mit.edu/lavacorpus/}, author = {Yevgeni Berzak and Andrei Barbu and Daniel Harari and Boris Katz and Shimon Ullman} }