|
@article{,
author={Kocsor, Andr{\'a}s and T{\'o}th, L{\'a}szl{\'o} and Kuba, Andr{\'a}s
Jr. and Kov{\'a}cs, Korn{\'e}l
and Jelasity, M{\'a}rk and Gyim{\'o}thy, Tibor and Csirik, J{\'a}nos},
title={A Comparative Study of Several Feature Space Transformation and
Learning
Methods for Phoneme Classification},
abstract={This paper examines the applicability of some learning techniques
for speech
recognition, more precisely, for the classification of phonemes represented
by a particular segment model. The methods compared were the IB1 algorithm
(TiMBL), ID3 tree learning (C4.5), oblique tree learning (OC1), artificial
neural nets (ANN), and Gaussian mixture modeling (GMM), and, as a reference,
a hidden Markov model (HMM) recognizer was also trained on the same corpus.
Before feeding them into the learners, the segmental features were additionally
transformed using either linear discriminant analysis (LDA), principal
component analysis (PCA), or independent component analysis (ICA). Each
learner was tested with each transformation in order to find the best
combination.
Furthermore, we experimented with several feature sets, such as filter-bank
energies, mel-frequency cepstral coefficients (MFCC), and gravity centers.
We found LDA helped all the learners, in several cases quite considerably.
PCA was beneficial only for some of the algorithms, and ICA improved the
results quite rarely and was bad for certain learning methods. From the
learning viewpoint, ANN was the most effective and attained the same results
independently of the transformation applied. GMM behaved worse, which
shows
the advantages of discriminative over generative learning. TiMBL produced
reasonable results; C4.5 and OC1 could not compete, no matter what transformation
was tried. },
journal={International Journal of Speech Technology},
volume={2},
year={2000},
pages={263-276},
month={December},
number={3-4}
}
|
|