Source code for kaldi.lat.align

from . import _phone_align_lattice as _pal
from . import _word_align_lattice as _wal
from . import _word_align_lattice_lexicon as _wall

from ._phone_align_lattice import *
from ._word_align_lattice import *
from ._word_align_lattice_lexicon import *

from .. import fstext as _fst
from ..util import io as _io


[docs]def phone_align_lattice(lat, tmodel, opts): """Aligns the phone labels and transition-ids. Outputs a lattice in which the arcs correspond exactly to sequences of phones, so the boundaries between the arcs correspond to the boundaries between phones. Args: lat (CompactLatticeVectorFst): The input lattice. tmodel (TransitionModel): The transition model. opts (PhoneAlignLatticeOptions): The phone alignment options. Returns: A tuple representing the return value and the output lattice. The return value is set to True if the operation was successful, False if some kind of problem was detected, e.g. transition-id sequences in the lattice were incompatible with the model. Note: If this function returns False, it doesn't mean the output lattice is necessarily bad. It might just be that the input lattice was "forced out" with partial words due to no final state being reached during decoding, and in this case the output might still be usable. Note: If `opts.remove_epsilon == True` and `opts.replace_output_symbols == False`, an arc may have >1 phone on it, but the boundaries will still correspond with the boundaries between phones. Note: If `opts.replace_output_symbols == False`, it is possible to have arcs with words on them but no transition-ids at all. See Also: :meth:`kaldi.lat.functions.convert_lattice_to_phones` """ success, lat_out = _pal._phone_align_lattice(lat, tmodel, opts) return success, _fst.CompactLatticeVectorFst(lat_out)
[docs]def word_align_lattice(lat, tmodel, info, max_states): """Aligns the word labels and transition-ids. Aligns compact lattice so that each arc has the transition-ids on it that correspond to the word that is on that arc. It is OK for the lattice to have epsilon arcs for optional silences. Args: lat (CompactLatticeVectorFst): The input lattice. tmodel (TransitionModel): The transition model. info (WordBoundaryInfo): The word boundary information. max_states (int): Maximum #states allowed in the output lattice. If `max_states > 0` and the #states of the output will be greater than `max_states`, this function will abort the computation, return False and output an empty lattice. Returns: A tuple representing the return value and the output lattice. The return value is set to True if the operation was successful, False if some kind of problem was detected, e.g. transition-id sequences in the lattice were incompatible with the word boundary information. Note: We don't expect silence inside words, or empty words (words with no phones), and we expect the word to start with a wbegin_phone, to end with a wend_phone, and to possibly have winternal_phones inside (or to consist of just one wbegin_and_end_phone). Note: If this function returns False, it doesn't mean the output lattice is necessarily bad. It might just be that the input lattice was "forced out" with partial words due to no final state being reached during decoding, and in this case the output might still be usable. """ success, lat_out = _wal._word_align_lattice(lat, tmodel, info, max_states) return success, _fst.CompactLatticeVectorFst(lat_out)
[docs]def word_align_lattice_lexicon(lat, tmodel, lexicon_info, opts): """Aligns the word labels and transition-ids using a lexicon. Aligns compact lattice so that each arc has the transition-ids on it that correspond to the word that is on that arc. It is OK for the lattice to have epsilon arcs for optional silences. Args: lat (CompactLatticeVectorFst): The input lattice. tmodel (TransitionModel): The transition model. lexicon_info (WordAlignLatticeLexiconInfo): The lexicon information. opts (WordAlignLatticeLexiconOpts): The word alignment options. Returns: A tuple representing the return value and the output lattice. The return value is set to True if the operation was successful, False if some kind of problem was detected, e.g. transition-id sequences in the lattice were incompatible with the lexicon information. Note: If this function returns False, it doesn't mean the output lattice is necessarily bad. It might just be that the input lattice was "forced out" with partial words due to no final state being reached during decoding, and in this case the output might still be usable. """ success, lat_out = _wall._word_align_lattice_lexicon(lat, tmodel, lexicon_info, opts) return success, _fst.CompactLatticeVectorFst(lat_out)
[docs]def read_lexicon_for_word_align(rxfilename): """Reads the lexicon in the special format required for word alignment. Each line has a series of integers on it (at least two on each line), representing: <old-word-id> <new-word-id> [<phone-id-1> [<phone-id-2> ... ] ] Here, <old-word-id> is the word-id that appears in the lattice before alignment, and <new-word-id> is the word-is that should appear in the lattice after alignment. This is mainly useful when the lattice may have no symbol for the optional-silence arcs (so <old-word-id> would equal zero), but we want it to be output with a symbol on those arcs (so <new-word-id> would be nonzero). If the silence should not be added to the lattice, both <old-word-id> and <new-word-id> may be zero. Args: rxfilename (str): Extended filename for reading the lexicon. Returns List[List[int]]: The lexicon in the format required for word alignment. Raises: ValueError: If reading the lexicon fails. """ with _io.xopen(rxfilename) as ki: if ki.binary: raise IOError("Not expecting binary file for lexicon.") return _wall._read_lexicon_for_word_align(ki.stream())
__all__ = [name for name in dir() if name[0] != '_' and not name.endswith('Base')]