@article {390, title = {Predicting Saliency Beyond Pixels}, year = {2014}, month = {01/2014}, abstract = {

A large body of previous models to predict where people look in natural scenes focused on pixel-level image attributes. To bridge the semantic gap between the predictive power of computational saliency models and human behavior, we propose a new saliency architecture that incorporates information at three layers: pixel-level image attributes, object-level attributes, and semantic-level attributes. Object- and semantic-level information is frequently ignored, or only a few sample object categories are discussed where scaling to a large number of object categories is not feasible nor neurally plausible. To address this problem, this work constructs a principled vocabulary of basic attributes to describe object- and semantic-level information thus not restricting to a limited number of object categories. We build a new dataset of 700 images with eye-tracking data of 15 viewers and annotation data of 5551 segmented objects with fine contours and 12 semantic attributes. Experimental results demonstrate the importance of the object- and semantic-level information in the prediction of visual attention.

}, url = {http://www.ece.nus.edu.sg/stfpage/eleqiz/predicting.html}, author = {Juan Xu and Ming Jiang and Shuo Wang and Mohan Kankanhalli and Qi Zhao} } @article {437, title = {VIP: A unifying framework for eye-gaze research}, year = {2013}, abstract = {

We have collected the first fixation dataset which captures all 3 VIP factors.

The images were selected from the NUSEF dataset, which contains both neutral and affective images. Out of 758 NUSEF images, 150 were randomly selected. 75 subjects were recruited from a mixture of undergraduate, postgraduate and working adult population. The male and female subjects are recruited separately to ensure an even distribution. They were tasked to view the 150 images in a free-viewing (i.e. without assigned task) or anomaly detection setting. Each image was displayed for 5 seconds, followed by 2 seconds viewing of a gray screen. The images were displayed in random order. Their eye-gaze data was recorded with a binocular infra-red based remote eye-tracking device SMI RED 250. The recording was done at 120Hz. The subjects were seated at 50 centimeters distance from a 22 inch LCD monitor with 1680x1050 resolution. This setup is similar to other ones used in eye-gaze research.

Before start of the viewing experiment, the subjects also provided their demographic data: gender, age-group, ethnicity, religion, field of study/work, highest education qualification, income group, expenditure group and nationality. 3 personality type questions are posed based on the Jung{\textquoteright}s Psychological types. The recorded eye-gaze data were preprocessed with the SMI SDK to extract the fixations from the preferred eye as chosen by the subjects.

Download and copyright

The VIP dataset can be downloaded as a single zip file. VIP dataset is available for research purposes only. By downloading or using the dataset, you are deemed to agree to terms and conditions.

If you are using this dataset, please cite:

A Unifying Framework for Computational Eye-Gaze Research.
Keng-Teck Ma. Terence Sim and Mohan Kankanhalli.
4th International Workshop on Human Behavior Understanding. Barcelona, Spain. 2013.[pdf]

}, url = {http://mmas.comp.nus.edu.sg/VIP.html}, author = {Keng-Teck Ma and Terence Sim and Mohan Kankanhalli} }