Andras, Kocsor

@article{,
author={T{\'o}th,L{\'a}szl{\'o} and Kocsor, Andr{\'a}s and Gosztolya, G{\'a}bor},
title={Telephone Speech Recognition via the Combination of Knowledge Sources in
a Segmental Speech Model},
abstract={The currently dominant speech recognition methodology, Hidden Markov Modeling,
treats speech as a stochastic random process with very simple mathematical
properties. The simplistic assumptions of the model, and especially that
of the independence of the observation vectors have been criticized by
many in the literature, and alternative solutions have been proposed. One
such alternative is segmental modeling, and the OASIS recognizer we have
been working on in the recent years belongs to this category. In this paper
we go one step further and suggest that we should consider speech recognition
as a knowledge source combination problem. We offer a generalized algorithmic
framework for this approach and show that both hidden Markov and segmental
modeling are a special case of this decoding scheme. In the second part
of the paper we describe the current components of the OASIS system and
evaluate its performance on a very difficult recognition task, the phonetically
balanced sentences of the MTBA Hungarian Telephone Speech Database. Our
results show that OASIS outperforms a traditional HMM system in phoneme
classification and achieves practically the same recognition scores at
the sentence level. },
journal={Acta Cybernetica},
volume={16},
year={2004},
pages={643-657},
number={4}
}