@inproceedings{ddbdfb9e6464422eb6fe0e72ff2094de,
title = "MedRAT: Unpaired Medical Report Generation via Auxiliary Tasks",
abstract = "Medical report generation from X-ray images is a challenging task, particularly in an unpaired setting where paired image-report data is unavailable for training. To address this challenge, we propose a novel model that leverages the available information in two distinct datasets, one comprising reports and the other consisting of images. The core idea of our model revolves around the notion that combining auto-encoding report generation with multi-modal (report-image) alignment can offer a solution. However, the challenge persists regarding how to achieve this alignment when pair correspondence is absent. Our proposed solution involves the use of auxiliary tasks, particularly contrastive learning and classification, to position related images and reports in close proximity to each other. This approach differs from previous methods that rely on pre-processing steps, such as using external information stored in a knowledge graph. Our model, named MedRAT, surpasses previous state-of-the-art methods, demonstrating the feasibility of generating comprehensive medical reports without the need for paired data or external tools. Our code is publicly available (https://github.com/eladhi/MedRAT).",
author = "Elad Hirsch and Gefen Dawidowicz and Ayellet Tal",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2025.; 18th European Conference on Computer Vision, ECCV 2024 ; Conference date: 29-09-2024 Through 04-10-2024",
year = "2025",
doi = "https://doi.org/10.1007/978-3-031-73209-6_2",
language = "الإنجليزيّة",
isbn = "9783031732089",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "18--35",
editor = "Ale{\v s} Leonardis and Elisa Ricci and Stefan Roth and Olga Russakovsky and Torsten Sattler and G{\"u}l Varol",
booktitle = "Computer Vision – ECCV 2024 - 18th European Conference, Proceedings",
address = "ألمانيا",
}