@article {390,
	title = {Predicting Saliency Beyond Pixels},
	year = {2014},
	month = {01/2014},
	abstract = {<p>A large body of previous models to predict where people look in natural scenes focused on pixel-level image attributes. To bridge the semantic gap between the predictive power of computational saliency models and human behavior, we propose a new saliency architecture that incorporates information at three layers: pixel-level image attributes, object-level attributes, and semantic-level attributes. Object- and semantic-level information is frequently ignored, or only a few sample object categories are discussed where scaling to a large number of object categories is not feasible nor neurally plausible. To address this problem, this work constructs a principled vocabulary of basic attributes to describe object- and semantic-level information thus not restricting to a limited number of object categories. We build a new dataset of 700 images with eye-tracking data of 15 viewers and annotation data of 5551 segmented objects with fine contours and 12 semantic attributes. Experimental results demonstrate the importance of the object- and semantic-level information in the prediction of visual attention.</p>
},
	url = {http://www.ece.nus.edu.sg/stfpage/eleqiz/predicting.html},
	author = {Juan Xu and Ming Jiang and Shuo Wang and Mohan Kankanhalli and Qi Zhao}
}
@article {437,
	title = {VIP: A unifying framework for eye-gaze research},
	year = {2013},
	abstract = {<p>We have collected the first fixation dataset which captures all 3 VIP factors.</p>

<p>The images were selected from the <a href="http://mmas.comp.nus.edu.sg/NUSEF.html">NUSEF</a> dataset, which contains both neutral and affective images. Out of 758 NUSEF images, 150 were randomly selected. 75 subjects were recruited from a mixture of undergraduate, postgraduate and working adult population. The male and female subjects are recruited separately to ensure an even distribution. They were tasked to view the 150 images in a free-viewing (i.e. without assigned task) or anomaly detection setting. Each image was displayed for 5 seconds, followed by 2 seconds viewing of a gray screen. The images were displayed in random order. Their eye-gaze data was recorded with a binocular infra-red based remote eye-tracking device SMI RED 250. The recording was done at 120Hz. The subjects were seated at 50 centimeters distance from a 22 inch LCD monitor with 1680x1050 resolution. This setup is similar to other ones used in eye-gaze research.</p>

<p>Before start of the viewing experiment, the subjects also provided their demographic data: gender, age-group, ethnicity, religion, field of study/work, highest education qualification, income group, expenditure group and nationality. 3 personality type questions are posed based on the Jung{\textquoteright}s Psychological types. The recorded eye-gaze data were preprocessed with the SMI SDK to extract the fixations from the preferred eye as chosen by the subjects.</p>

<h3>Download and copyright</h3>

<p>The VIP dataset can be downloaded as a <a href="http://mmas.comp.nus.edu.sg/VIP_files/VIP_Dataset.zip">single zip file</a>. VIP dataset is available for research purposes only. By downloading or using the dataset, you are deemed to agree to <a href="http://mmas.comp.nus.edu.sg/VIP_files/terms.html">terms and conditions</a>.</p>

<p>If you are using this dataset, please cite:</p>

<p>A Unifying Framework for Computational Eye-Gaze Research.<br />
Keng-Teck Ma. Terence Sim and Mohan Kankanhalli.<br />
4th International Workshop on Human Behavior Understanding. Barcelona, Spain. 2013.[<a href="http://mmas.comp.nus.edu.sg/VIP_files/hbu13_VIP.pdf">pdf</a>]</p>
},
	url = {http://mmas.comp.nus.edu.sg/VIP.html},
	author = {Keng-Teck Ma and Terence Sim and Mohan Kankanhalli}
}