@inproceedings{c179cebb5e594497b678800ce45d28c0,
title = "Algorithms for estimation of comic speakers considering reading order of frames and texts",
abstract = "Machine learning methods in recent years have focused on multimodal input and cross-modal tasks, and they are used as approaches to problems in various domains. Associating comic texts and characters using these approaches is informative for commercial activities such as speech synthesis and automatic translation of texts. In this study, we address the task of associating a text with a speaker in comics. It is challenging to correspond between them because these are not self-evidently attached, and few studies have attempted. These previous studies have less considered the continuity of comics such as narrative flow or contextual information. We assume that considering the continuity of comics is effective for speaker estimation. This paper proposes algorithms for estimating the reading order of frames or texts, and it also proposes methods for estimating speakers based on these orders. As a result, our proposed method improves accuracy compared to previous methods. Consideration of the frame order is an effective clue to the comic speaker estimation.",
keywords = "Comic, Multimodal, Speaker estimation",
author = "Yuga Omori and Kota Nagamizo and Daisuke Ikeda",
note = "Publisher Copyright: {\textcopyright} 2022 IEEE.; 12th International Congress on Advanced Applied Informatics, IIAI-AAI 2022 ; Conference date: 02-07-2022 Through 07-07-2022",
year = "2022",
doi = "10.1109/IIAIAAI55812.2022.00080",
language = "English",
series = "Proceedings - 2022 12th International Congress on Advanced Applied Informatics, IIAI-AAI 2022",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "367--372",
editor = "Tokuro Matsuo and Kunihiko Takamatsu and Yuichi Ono",
booktitle = "Proceedings - 2022 12th International Congress on Advanced Applied Informatics, IIAI-AAI 2022",
address = "United States",
}