@inproceedings{e795e3d4686b4062919cc8ea5822de02,
title = "Context-aware captions from context-agnostic supervision",
abstract = "We introduce an inference technique to produce discriminative context-aware image captions (captions that describe differences between images or visual concepts) using only generic context-agnostic training data (captions that describe a concept or an image in isolation). For example, given images and captions of “siamese cat” and “tiger cat”, we generate language that describes the “siamese cat” in a way that distinguishes it from “tiger cat”. Our key novelty is that we show how to do joint inference over a language model that is context-agnostic and a listener which distinguishes closely-related concepts. We first apply our technique to a justification task, namely to describe why an image contains a particular fine-grained category as opposed to another closely-related category of the CUB-200-2011 dataset. We then study discriminative image captioning to generate language that uniquely refers to one of two semantically-similar images in the COCO dataset. Evaluations with discriminative ground truth for justification and human studies for discriminative image captioning reveal that our approach outperforms baseline generative and speaker-listener approaches for discrimination.",
author = "Ramakrishna Vedantam and Samy Bengio and Kevin Murphy and Devi Parikh and Gal Chechik",
note = "Publisher Copyright: {\textcopyright} 2017 IEEE.; 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017 ; Conference date: 21-07-2017 Through 26-07-2017",
year = "2017",
month = nov,
day = "6",
doi = "10.1109/CVPR.2017.120",
language = "الإنجليزيّة",
series = "Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "1070--1079",
booktitle = "Proceedings - 30th IEEE Conference on Computer Vision and Pattern Recognition, CVPR 2017",
address = "الولايات المتّحدة",
}