@inproceedings{2bae92f659014f9cb71bda75752a5885,
title = "Vowel duration measurement using deep neural networks",
abstract = "Vowel durations are most often utilized in studies addressing specific issues in phonetics. Thus far this has been hampered by a reliance on subjective, labor-intensive manual annotation. Our goal is to build an algorithm for automatic accurate measurement of vowel duration, where the input to the algorithm is a speech segment contains one vowel preceded and followed by consonants (CVC). Our algorithm is based on a deep neural network trained at the frame level on manually annotated data from a phonetic study. Specifically, we try two deep-network architectures: convolutional neural network (CNN), and deep belief network (DBN), and compare their accuracy to an HMM-based forced aligner. Results suggest that CNN is better than DBN, and both CNN and HMM-based forced aligner are comparable in their results, but neither of them yielded the same predictions as models fit to manually annotated data.",
keywords = "Forced alignment, convolution neural networks, deep belief networks, hidden Markov models, vowel duration measurement",
author = "Yossi Adi and Joseph Keshet and Matthew Goldrick",
note = "Publisher Copyright: {\textcopyright} 2015 IEEE.; 25th IEEE International Workshop on Machine Learning for Signal Processing, MLSP 2015 ; Conference date: 17-09-2015 Through 20-09-2015",
year = "2015",
month = nov,
day = "10",
doi = "10.1109/MLSP.2015.7324331",
language = "الإنجليزيّة",
series = "IEEE International Workshop on Machine Learning for Signal Processing, MLSP",
publisher = "IEEE Computer Society",
editor = "Deniz Erdogmus and Serdar Kozat and Jan Larsen and Murat Akcakaya",
booktitle = "2015 IEEE International Workshop on Machine Learning for Signal Processing - Proceedings of MLSP 2015",
address = "الولايات المتّحدة",
}