Source code for kaldi.asr

"""
This module provides a number of speech recognizers with an easy to use API.

Note that in Kaldi, therefore in PyKaldi, there is no single "canonical"
decoder, or a fixed interface that decoders must satisfy. Same is true for the
models. The decoders and models provided by Kaldi/PyKaldi can be mixed and
matched to construct specialized speech recognizers. The speech recognizers in
this module cover only the most "typical" combinations.
"""

from __future__ import division

from . import cudamatrix as _cumatrix
from . import decoder as _dec
from . import fstext as _fst
from .fstext import _fst as _fst_fst
from .fstext import properties as _fst_props
from .fstext import special as _fst_spec
from .fstext import utils as _fst_utils
from .gmm import am as _gmm_am
from . import hmm as _hmm
from .lat import functions as _lat_funcs
from . import lm as _lm
from .matrix import _kaldi_matrix
from . import rnnlm as _rnnlm
from . import nnet3 as _nnet3
from . import online2 as _online2
from .util import io as _util_io


__all__ = ['Recognizer',
           'FasterRecognizer',
           'LatticeFasterRecognizer',
           'LatticeBiglmFasterRecognizer',
           'MappedRecognizer',
           'MappedFasterRecognizer',
           'MappedLatticeFasterRecognizer',
           'MappedLatticeBiglmFasterRecognizer',
           'GmmRecognizer',
           'GmmFasterRecognizer',
           'GmmLatticeFasterRecognizer',
           'GmmLatticeBiglmFasterRecognizer',
           'NnetRecognizer',
           'NnetFasterRecognizer',
           'NnetLatticeFasterRecognizer',
           'NnetLatticeFasterBatchRecognizer',
           'NnetLatticeFasterGrammarRecognizer',
           'NnetLatticeBiglmFasterRecognizer',
           'OnlineRecognizer',
           'NnetOnlineRecognizer',
           'NnetLatticeFasterOnlineRecognizer',
           'NnetLatticeFasterOnlineGrammarRecognizer',
           'LatticeLmRescorer']


[docs]class Recognizer(object):
    """Base class for speech recognizers.

    Args:
        decoder (object): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, decoder, symbols=None, allow_partial=True,
                 acoustic_scale=0.1):
        self.decoder = decoder
        self.symbols = symbols
        self.allow_partial = allow_partial
        self.acoustic_scale = acoustic_scale

    def _make_decodable(self, loglikes):
        """Constructs a new decodable object from input log-likelihoods.

        Args:
            loglikes (object): Input log-likelihoods.

        Returns:
            DecodableMatrixScaled: A decodable object for computing scaled
            log-likelihoods.
        """
        if loglikes.num_rows == 0:
            raise ValueError("Empty loglikes matrix.")
        return _dec.DecodableMatrixScaled(loglikes, self.acoustic_scale)

    def _determinize_lattice(self, lattice):
        """Determinizes raw state-level lattice.

        Args:
            lattice (Lattice): Raw state-level lattice.

        Returns:
            CompactLattice or Lattice: A deterministic compact lattice if the
            decoder is configured to determinize lattices. Otherwise, a raw
            state-level lattice.
        """
        opts = self.decoder.get_options()
        if opts.determinize_lattice:
            det_opts = _lat_funcs.DeterminizeLatticePrunedOptions()
            det_opts.max_mem = opts.det_opts.max_mem
            return _lat_funcs.determinize_lattice_pruned(
                lattice, opts.lattice_beam, det_opts, True)
        else:
            return lattice

[docs]    def decode(self, input):
        """Decodes input.

        Output is a dictionary with the following `(key, value)` pairs:

        ============ =========================== ==============================
        key          value                       value type
        ============ =========================== ==============================
        "alignment"  Frame-level alignment       `List[int]`
        "best_path"  Best lattice path           `CompactLattice`
        "lattice"    Output lattice              `Lattice` or `CompactLattice`
        "likelihood" Log-likelihood of best path `float`
        "text"       Output transcript           `str`
        "weight"     Cost of best path           `LatticeWeight`
        "words"      Words on best path          `List[int]`
        ============ =========================== ==============================

        The "lattice" output is produced only if the decoder can generate
        lattices. It will be a deterministic compact lattice if the decoder is
        configured to determinize lattices. Otherwise, it will be a raw
        state-level lattice.

        If :attr:`symbols` is ``None``, the "text" output will be a string of
        space separated integer indices. Otherwise it will be a string of space
        separated symbols. The "weight" output is a lattice weight consisting of
        (graph-score, acoustic-score).

        Args:
            input (object): Input to decode.

        Returns:
            A dictionary representing decoding output.

        Raises:
            RuntimeError: If decoding fails.
        """
        self.decoder.decode(self._make_decodable(input))

        if not (self.allow_partial or self.decoder.reached_final()):
            raise RuntimeError("No final state was active on the last frame.")

        try:
            best_path = self.decoder.get_best_path()
        except RuntimeError:
            raise RuntimeError("Empty decoding output.")

        ali, words, weight = _fst_utils.get_linear_symbol_sequence(best_path)

        if self.symbols:
            text = " ".join(_fst.indices_to_symbols(self.symbols, words))
        else:
            text = " ".join(map(str, words))

        likelihood = - (weight.value1 + weight.value2)

        if self.acoustic_scale != 0.0:
            scale = _fst_utils.acoustic_lattice_scale(1.0 / self.acoustic_scale)
            _fst_utils.scale_lattice(scale, best_path)
        best_path = _fst_utils.convert_lattice_to_compact_lattice(best_path)

        try:
            lat = self.decoder.get_raw_lattice()
        except AttributeError:
            return {
                "alignment": ali,
                "best_path": best_path,
                "likelihood": likelihood,
                "text": text,
                "weight": weight,
                "words": words,
            }
        if lat.num_states() == 0:
            raise RuntimeError("Empty output lattice.")
        lat.connect()

        lat = self._determinize_lattice(lat)

        if self.acoustic_scale != 0.0:
            if isinstance(lat, _fst.CompactLatticeVectorFst):
                _fst_utils.scale_compact_lattice(scale, lat)
            else:
                _fst_utils.scale_lattice(scale, lat)

        return {
            "alignment": ali,
            "best_path": best_path,
            "lattice": lat,
            "likelihood": likelihood,
            "text": text,
            "weight": weight,
            "words": words,
        }


[docs]class FasterRecognizer(Recognizer):
    """Faster speech recognizer.

    This recognizer can be used to decode log-likelihood matrices. Non-zero
    labels on the decoding graph, e.g. transition-ids, are looked up in the
    log-likelihood matrices using 1-based indexing -- index 0 is reserved for
    epsilon symbols in OpenFst.

    Args:
        decoder (FasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(decoder, _dec.FasterDecoder):
            raise TypeError("decoder should be a FasterDecoder")
        super(FasterRecognizer, self).__init__(decoder, symbols,
                                               allow_partial, acoustic_scale)

[docs]    @classmethod
    def from_files(cls, graph_rxfilename, symbols_filename=None,
                   allow_partial=True, acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (FasterDecoderOptions): Configuration options for the
                decoder.

        Returns:
            FasterRecognizer: A new recognizer.
        """
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.FasterDecoderOptions()
        decoder = _dec.FasterDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(decoder, symbols, allow_partial, acoustic_scale)


[docs]class LatticeFasterRecognizer(Recognizer):
    """Lattice-generating faster speech recognizer.

    This recognizer can be used to decode log-likelihood matrices into lattices.
    Non-zero labels on the decoding graph, e.g. transition-ids, are looked up in
    the log-likelihood matrices using 1-based indexing -- index 0 is reserved
    for epsilon symbols in OpenFst.

    Args:
        decoder (LatticeFasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(decoder, _dec.LatticeFasterDecoder):
            raise TypeError("decoder should be a LatticeFasterDecoder")
        super(LatticeFasterRecognizer, self).__init__(
            decoder, symbols, allow_partial, acoustic_scale)

[docs]    @classmethod
    def from_files(cls, graph_rxfilename, symbols_filename=None,
                   allow_partial=True, acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (LatticeFasterDecoderOptions): Configuration options
                for the decoder.

        Returns:
            LatticeFasterRecognizer: A new recognizer.
        """
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeFasterDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(decoder, symbols, allow_partial, acoustic_scale)


[docs]class LatticeBiglmFasterRecognizer(Recognizer):
    """Lattice generating big-LM faster speech recognizer.

    This recognizer can be used to decode log-likelihood matrices into lattices.
    Non-zero labels on the decoding graph, e.g. transition-ids, are looked up in
    the log-likelihood matrices using 1-based indexing -- index 0 is reserved
    for epsilon symbols in OpenFst.

    Args:
        decoder (LatticeBiglmFasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, decoder, symbols=None, allow_partial=True,
                 acoustic_scale=0.1):
        if not isinstance(decoder, _dec.LatticeBiglmFasterDecoder):
            raise TypeError("decoder should be a LatticeBiglmFasterDecoder")
        super(LatticeBiglmFasterRecognizer, self).__init__(
            decoder, symbols, allow_partial, acoustic_scale)

[docs]    @classmethod
    def from_files(cls, graph_rxfilename, old_lm_rxfilename, new_lm_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            graph_rxfilename (str): Extended filename for reading the graph.
            old_lm_rxfilename (str): Extended filename for reading the old LM.
            new_lm_rxfilename (str): Extended filename for reading the new LM.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (LatticeFasterDecoderOptions): Configuration
                options for the decoder.

        Returns:
            LatticeBiglmFasterRecognizer: A new recognizer.
        """
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        self.old_lm = _fst.read_fst_kaldi(old_lm_rxfilename)
        _fst_utils.apply_probability_scale(-1.0, self.old_lm)
        self.new_lm = _fst.read_fst_kaldi(new_lm_rxfilename)
        self._old_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(self.old_lm)
        self._new_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(self.new_lm)
        self._compose_lm = _fst_spec.StdComposeDeterministicOnDemandFst(
            self._old_lm, self._new_lm)
        self._cache_compose_lm = _fst_spec.StdCacheDeterministicOnDemandFst(
            self._compose_lm)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeBiglmFasterDecoder(graph, decoder_opts,
                                                 self._cache_compose_lm)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(decoder, symbols, allow_partial, acoustic_scale)


[docs]class MappedRecognizer(Recognizer):
    """Base class for mapped speech recognizers.

    Args:
        transition_model (TransitionModel): The transition model.
        decoder (object): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, transition_model, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        self.transition_model = transition_model
        self.decoder = decoder
        self.symbols = symbols
        self.allow_partial = allow_partial
        self.acoustic_scale = acoustic_scale

[docs]    @staticmethod
    def read_model(model_rxfilename):
        """Reads transition model from an extended filename."""
        with _util_io.xopen(model_rxfilename) as ki:
            return _hmm.TransitionModel().read(ki.stream(), ki.binary)

    def _make_decodable(self, loglikes):
        """Constructs a new decodable object from input log-likelihoods.

        Args:
            loglikes (object): Input log-likelihoods.

        Returns:
            DecodableMatrixScaledMapped: A decodable object for computing scaled
            log-likelihoods.
        """
        if loglikes.num_rows == 0:
            raise ValueError("Empty loglikes matrix.")
        return _dec.DecodableMatrixScaledMapped(self.transition_model, loglikes,
                                                self.acoustic_scale)

    def _determinize_lattice(self, lattice):
        """Determinizes raw state-level lattice.

        Args:
            lattice (Lattice): Raw state-level lattice.

        Returns:
            CompactLattice or Lattice: A deterministic compact lattice if the
            decoder is configured to determinize lattices. Otherwise, a raw
            state-level lattice.
        """
        opts = self.decoder.get_options()
        if opts.determinize_lattice:
            return _lat_funcs.determinize_lattice_phone_pruned(
                lattice, self.transition_model, opts.lattice_beam,
                opts.det_opts, True)
        else:
            return lattice


[docs]class MappedFasterRecognizer(MappedRecognizer):
    """Mapped faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        decoder (FasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, transition_model, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(decoder, _dec.FasterDecoder):
            raise TypeError("decoder should be a FasterDecoder")
        super(MappedFasterRecognizer, self).__init__(
            transition_model, decoder, symbols, allow_partial, acoustic_scale)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the transition
                model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (FasterDecoderOptions): Configuration options for the
                decoder.

        Returns:
            MappedFasterRecognizer: A new recognizer object.
        """
        transition_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.FasterDecoderOptions()
        decoder = _dec.FasterDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, decoder, symbols,
                   allow_partial, acoustic_scale)


[docs]class MappedLatticeFasterRecognizer(MappedRecognizer):
    """Mapped lattice generating faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        decoder (LatticeFasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, transition_model, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(decoder, _dec.LatticeFasterDecoder):
            raise TypeError("decoder should be a LatticeFasterDecoder")
        super(MappedLatticeFasterRecognizer, self).__init__(
            transition_model, decoder, symbols, allow_partial, acoustic_scale)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the transition
                model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (LatticeFasterDecoderOptions): Configuration options
                for the decoder.

        Returns:
            MappedFasterRecognizer: A new recognizer object.
        """
        transition_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeFasterDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, decoder, symbols,
                   allow_partial, acoustic_scale)


[docs]class MappedLatticeBiglmFasterRecognizer(MappedRecognizer):
    """GMM based lattice generating big-LM faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        decoder (LatticeBiglmFasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, transition_model, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(decoder, _dec.LatticeBiglmFasterDecoder):
            raise TypeError("decoder should be a LatticeBiglmFasterDecoder")
        super(MappedLatticeBiglmFasterRecognizer, self).__init__(
            transition_model, decoder, symbols, allow_partial, acoustic_scale)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename, old_lm_rxfilename,
                   new_lm_rxfilename, symbols_filename=None, allow_partial=True,
                   acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the transition
                model.
            graph_rxfilename (str): Extended filename for reading the graph.
            old_lm_rxfilename (str): Extended filename for reading the old LM.
            new_lm_rxfilename (str): Extended filename for reading the new LM.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (LatticeFasterDecoderOptions): Configuration
                options for the decoder.

        Returns:
            MappedLatticeBiglmFasterRecognizer: A new recognizer.
        """
        transition_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        self.old_lm = _fst.read_fst_kaldi(old_lm_rxfilename)
        _fst_utils.apply_probability_scale(-1.0, self.old_lm)
        self.new_lm = _fst.read_fst_kaldi(new_lm_rxfilename)
        self._old_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(self.old_lm)
        self._new_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(self.new_lm)
        self._compose_lm = _fst_spec.StdComposeDeterministicOnDemandFst(
            self._old_lm, self._new_lm)
        self._cache_compose_lm = _fst_spec.StdCacheDeterministicOnDemandFst(
            self._compose_lm)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeBiglmFasterDecoder(graph, decoder_opts,
                                                 self._cache_compose_lm)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, decoder, symbols,
                   allow_partial, acoustic_scale)


[docs]class GmmRecognizer(Recognizer):
    """Base class for GMM based speech recognizers.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmDiagGmm): The acoustic model.
        decoder (object): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, transition_model, acoustic_model, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(acoustic_model, _gmm_am.AmDiagGmm):
            raise TypeError("acoustic_model argument should be a diagonal GMM")
        self.transition_model = transition_model
        self.acoustic_model = acoustic_model
        super(GmmRecognizer, self).__init__(decoder, symbols,
                                            allow_partial, acoustic_scale)

[docs]    @staticmethod
    def read_model(model_rxfilename):
        """Reads model from an extended filename."""
        with _util_io.xopen(model_rxfilename) as ki:
            transition_model = _hmm.TransitionModel().read(ki.stream(),
                                                           ki.binary)
            acoustic_model = _gmm_am.AmDiagGmm().read(ki.stream(), ki.binary)
        return transition_model, acoustic_model

    def _make_decodable(self, features):
        """Constructs a new decodable object from input features.

        Args:
            features (object): Input features.

        Returns:
            DecodableAmDiagGmmScaled: A decodable object for computing scaled
            log-likelihoods.
        """
        if features.num_rows == 0:
            raise ValueError("Empty feature matrix.")
        return _gmm_am.DecodableAmDiagGmmScaled(self.acoustic_model,
                                                self.transition_model,
                                                features, self.acoustic_scale)

    def _determinize_lattice(self, lattice):
        """Determinizes raw state-level lattice.

        Args:
            lattice (Lattice): Raw state-level lattice.

        Returns:
            CompactLattice or Lattice: A deterministic compact lattice if the
            decoder is configured to determinize lattices. Otherwise, a raw
            state-level lattice.
        """
        opts = self.decoder.get_options()
        if opts.determinize_lattice:
            return _lat_funcs.determinize_lattice_phone_pruned(
                lattice, self.transition_model, opts.lattice_beam,
                opts.det_opts, True)
        else:
            return lattice


[docs]class GmmFasterRecognizer(GmmRecognizer):
    """GMM based faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmDiagGmm): The acoustic model.
        decoder (FasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, transition_model, acoustic_model, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(decoder, _dec.FasterDecoder):
            raise TypeError("decoder should be a FasterDecoder")
        super(GmmFasterRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols,
            allow_partial, acoustic_scale)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new GMM recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (FasterDecoderOptions): Configuration options for the
                decoder.

        Returns:
            A new GMM recognizer object.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.FasterDecoderOptions()
        decoder = _dec.FasterDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, acoustic_scale)


[docs]class GmmLatticeFasterRecognizer(GmmRecognizer):
    """GMM based lattice generating faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmDiagGmm): The acoustic model.
        decoder (LatticeFasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, transition_model, acoustic_model, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(decoder, _dec.LatticeFasterDecoder):
            raise TypeError("decoder should be a LatticeFasterDecoder")
        super(GmmLatticeFasterRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols,
            allow_partial, acoustic_scale)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new GMM recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (LatticeFasterDecoderOptions): Configuration options
                for the decoder.

        Returns:
            A new GMM recognizer object.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeFasterDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, acoustic_scale)


[docs]class GmmLatticeBiglmFasterRecognizer(GmmRecognizer):
    """GMM based lattice generating big-LM faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmDiagGmm): The acoustic model.
        decoder (LatticeBiglmFasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, transition_model, acoustic_model, decoder, symbols=None,
                 allow_partial=True, acoustic_scale=0.1):
        if not isinstance(decoder, _dec.LatticeBiglmFasterDecoder):
            raise TypeError("decoder should be a LatticeBiglmFasterDecoder")
        super(GmmLatticeBiglmFasterRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols, allow_partial,
            acoustic_scale)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename, old_lm_rxfilename,
                   new_lm_rxfilename, symbols_filename=None, allow_partial=True,
                   acoustic_scale=0.1, decoder_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            old_lm_rxfilename (str): Extended filename for reading the old LM.
            new_lm_rxfilename (str): Extended filename for reading the new LM.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            acoustic_scale (float): Acoustic score scale.
            decoder_opts (LatticeFasterDecoderOptions): Configuration
                options for the decoder.

        Returns:
            GmmLatticeBiglmFasterRecognizer: A new recognizer.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        self.old_lm = _fst.read_fst_kaldi(old_lm_rxfilename)
        _fst_utils.apply_probability_scale(-1.0, self.old_lm)
        self.new_lm = _fst.read_fst_kaldi(new_lm_rxfilename)
        self._old_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(self.old_lm)
        self._new_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(self.new_lm)
        self._compose_lm = _fst_spec.StdComposeDeterministicOnDemandFst(
            self._old_lm, self._new_lm)
        self._cache_compose_lm = _fst_spec.StdCacheDeterministicOnDemandFst(
            self._compose_lm)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeBiglmFasterDecoder(graph, decoder_opts,
                                                 self._cache_compose_lm)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, acoustic_scale)


[docs]class NnetRecognizer(Recognizer):
    """Base class for neural network based speech recognizers.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        decoder (object): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decodable_opts (NnetSimpleComputationOptions): Configuration options for
            simple nnet3 am decodable objects.
        online_ivector_period (int): Onlne ivector period. Relevant only if
            online ivectors are used.
    """
    def __init__(self, transition_model, acoustic_model, decoder,
                 symbols=None, allow_partial=True, decodable_opts=None,
                 online_ivector_period=10):
        if not isinstance(acoustic_model, _nnet3.AmNnetSimple):
            raise TypeError("acoustic_model should be a AmNnetSimple object")
        self.transition_model = transition_model
        self.acoustic_model = acoustic_model
        nnet = self.acoustic_model.get_nnet()
        _nnet3.set_batchnorm_test_mode(True, nnet)
        _nnet3.set_dropout_test_mode(True, nnet)
        _nnet3.collapse_model(_nnet3.CollapseModelConfig(), nnet)
        if decodable_opts:
            if not isinstance(decodable_opts,
                              _nnet3.NnetSimpleComputationOptions):
                raise TypeError("decodable_opts should be either None or a "
                                "NnetSimpleComputationOptions object")
            self.decodable_opts = decodable_opts
        else:
            self.decodable_opts = _nnet3.NnetSimpleComputationOptions()
        self.compiler = _nnet3.CachingOptimizingCompiler.new_with_optimize_opts(
            nnet, self.decodable_opts.optimize_config)
        self.online_ivector_period = online_ivector_period
        super(NnetRecognizer, self).__init__(decoder, symbols, allow_partial,
                                             self.decodable_opts.acoustic_scale)

[docs]    @staticmethod
    def read_model(model_rxfilename):
        """Reads model from an extended filename."""
        with _util_io.xopen(model_rxfilename) as ki:
            transition_model = _hmm.TransitionModel().read(ki.stream(),
                                                           ki.binary)
            acoustic_model = _nnet3.AmNnetSimple().read(ki.stream(), ki.binary)
        return transition_model, acoustic_model

    def _make_decodable(self, features):
        """Constructs a new decodable object from input features.

        Input can be just a feature matrix or a tuple of a feature matrix and
        an ivector or a tuple of a feature matrix and an online ivector matrix.

        Args:
            features (Matrix or Tuple[Matrix, Vector] or Tuple[Matrix, Matrix]):
                Input features.

        Returns:
            DecodableAmNnetSimple: A decodable object for computing scaled
            log-likelihoods.
        """
        ivector, online_ivectors = None, None
        if isinstance(features, tuple):
            features, ivector_features = features
            if isinstance(ivector_features, _kaldi_matrix.MatrixBase):
                online_ivectors = ivector_features
            else:
                ivector = ivector_features
        if features.num_rows == 0:
            raise ValueError("Empty feature matrix.")
        return _nnet3.DecodableAmNnetSimple(
            self.decodable_opts, self.transition_model, self.acoustic_model,
            features, ivector, online_ivectors, self.online_ivector_period,
            self.compiler)

    def _determinize_lattice(self, lattice):
        """Determinizes raw state-level lattice.

        Args:
            lattice (Lattice): Raw state-level lattice.

        Returns:
            CompactLattice or Lattice: A deterministic compact lattice if the
            decoder is configured to determinize lattices. Otherwise, a raw
            state-level lattice.
        """
        opts = self.decoder.get_options()
        if opts.determinize_lattice:
            return _lat_funcs.determinize_lattice_phone_pruned(
                lattice, self.transition_model, opts.lattice_beam,
                opts.det_opts, True)
        else:
            return lattice


[docs]class NnetFasterRecognizer(NnetRecognizer):
    """Neural network based faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        decoder (FasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decodable_opts (NnetSimpleComputationOptions): Configuration options for
            simple nnet3 am decodable objects.
        online_ivector_period (int): Onlne ivector period. Relevant only if
            online ivectors are used.
    """
    def __init__(self, transition_model, acoustic_model, decoder,
                 symbols=None, allow_partial=True, decodable_opts=None,
                 online_ivector_period=10):
        if not isinstance(decoder, _dec.FasterDecoder):
            raise TypeError("decoder argument should be a FasterDecoder")
        super(NnetFasterRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols, allow_partial,
            decodable_opts, online_ivector_period)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   decoder_opts=None, decodable_opts=None,
                   online_ivector_period=10):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            decoder_opts (FasterDecoderOptions): Configuration options for the
                decoder.
            decodable_opts (NnetSimpleComputationOptions): Configuration options
                for simple nnet3 am decodable objects.
            online_ivector_period (int): Onlne ivector period. Relevant only if
                online ivectors are used.

        Returns:
            NnetFasterRecognizer: A new recognizer.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.FasterDecoderOptions()
        decoder = _dec.FasterDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, decodable_opts, online_ivector_period)


[docs]class NnetLatticeFasterRecognizer(NnetRecognizer):
    """Neural network based lattice generating faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        decoder (LatticeFasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decodable_opts (NnetSimpleComputationOptions): Configuration options for
            simple nnet3 am decodable objects.
        online_ivector_period (int): Onlne ivector period. Relevant only if
            online ivectors are used.
    """
    def __init__(self, transition_model, acoustic_model, decoder,
                 symbols=None, allow_partial=True, decodable_opts=None,
                 online_ivector_period=10):
        if not isinstance(decoder, _dec.LatticeFasterDecoder):
            raise TypeError("decoder argument should be a LatticeFasterDecoder")
        super(NnetLatticeFasterRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols, allow_partial,
            decodable_opts, online_ivector_period)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   decoder_opts=None, decodable_opts=None,
                   online_ivector_period=10):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            decoder_opts (LatticeFasterDecoderOptions): Configuration options
                for the decoder.
            decodable_opts (NnetSimpleComputationOptions): Configuration options
                for simple nnet3 am decodable objects.
            online_ivector_period (int): Onlne ivector period. Relevant only if
                online ivectors are used.

        Returns:
            NnetLatticeFasterRecognizer: A new recognizer.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeFasterDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, decodable_opts, online_ivector_period)


[docs]class NnetLatticeFasterBatchRecognizer(object):
    """Neural network based lattice generating faster batch speech recognizer.

    This uses multiple CPU threads for the graph search, plus a GPU thread for
    the neural net inference. The interface of this object should be accessed
    from only one thread, presumably the main thread of the program.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        graph (StdFst): The decoding graph.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decoder_opts (LatticeFasterDecoderOptions): Configuration options
            for the decoder.
        compute_opts (NnetBatchComputerOptions): Configuration options
            for neural network batch computer.
        num_threads (int): Number of processing threads.
        online_ivector_period (int): Onlne ivector period. Relevant only if
            online ivectors are used.
    """
    def __init__(self, transition_model, acoustic_model, graph, symbols=None,
                 allow_partial=True, decoder_opts=None, compute_opts=None,
                 num_threads=1, online_ivector_period=10):
        self.transition_model = transition_model
        self.acoustic_model = acoustic_model
        nnet = self.acoustic_model.get_nnet()
        _nnet3.set_batchnorm_test_mode(True, nnet)
        _nnet3.set_dropout_test_mode(True, nnet)
        _nnet3.collapse_model(_nnet3.CollapseModelConfig(), nnet)
        self.graph = graph
        self.symbols = symbols
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        if not compute_opts:
            compute_opts = _nnet3.NnetBatchComputerOptions()
        self.computer = _nnet3.NnetBatchComputer(compute_opts, nnet,
                                                 self.acoustic_model.priors())
        self.decoder = _nnet3.NnetBatchDecoder(
            self.graph, decoder_opts, self.transition_model, self.symbols,
            allow_partial, num_threads, self.computer)
        if decoder_opts.determinize_lattice:
            self._get_output = self.decoder.get_output
        else:
            self._get_output = self.decoder.get_raw_output
        self.online_ivector_period = online_ivector_period

[docs]    @staticmethod
    def read_model(model_rxfilename):
        """Reads model from an extended filename."""
        with _util_io.xopen(model_rxfilename) as ki:
            transition_model = _hmm.TransitionModel().read(ki.stream(),
                                                           ki.binary)
            acoustic_model = _nnet3.AmNnetSimple().read(ki.stream(), ki.binary)
        return transition_model, acoustic_model

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True, decoder_opts=None,
                   compute_opts=None, num_threads=1, online_ivector_period=10):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            decoder_opts (LatticeFasterDecoderOptions): Configuration options
                for the decoder.
            compute_opts (NnetBatchComputerOptions): Configuration options
                for neural network batch computer.
            num_threads (int): Number of processing threads.
            online_ivector_period (int): Onlne ivector period. Relevant only if
                online ivectors are used.

        Returns:
            NnetLatticeFasterBatchRecognizer: A new recognizer.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, graph, symbols,
                   allow_partial, decoder_opts, compute_opts, num_threads,
                   online_ivector_period)

[docs]    def accept_input(self, key, input):
        """Accepts input for decoding.

        This should be called for each utterance that is to be decoded
        (interspersed with calls to :meth:`get_output`). This call will block
        when no threads are ready to start processing this utterance.

        Input can be just a feature matrix or a tuple of a feature matrix and
        an ivector or a tuple of a feature matrix and an online ivector matrix.

        Args:
            key (str): Utterance ID. This ID will be used to identify the
                utterance when returned by :meth:`get_output`.
            input (Matrix or Tuple[Matrix, Vector] or Tuple[Matrix, Matrix]):
                Input to decode.

        Raises:
            RuntimeError: If decoding fails.
        """
        ivector, online_ivectors = None, None
        if isinstance(input, tuple):
            features, ivector_features = input
            if isinstance(ivector_features, _kaldi_matrix.MatrixBase):
                online_ivectors = ivector_features
            else:
                ivector = ivector_features
        else:
            features = input
        if features.num_rows == 0:
            raise ValueError("Empty feature matrix.")
        self.decoder.accept_input(key, features, ivector, online_ivectors,
                                  self.online_ivector_period)

[docs]    def get_output(self):
        """Returns the next available output.

        This returns the output for the first utterance in the output queue.
        The outputs returned by this method are guaranteed to be in the same
        order the inputs were provieded, but they may be delayed and some
        outputs might be missing, for instance because of search failures.

        This call does not block.

        Output is a dictionary with the following `(key, value)` pairs:

        ============ =========================== ==============================
        key          value                       value type
        ============ =========================== ==============================
        "key"        Utterence ID                `str`
        "lattice"    Output lattice              `Lattice` or `CompactLattice`
        "text"       Output transcript           `str`
        ============ =========================== ==============================

        The "lattice" output will be a deterministic compact lattice if lattice
        determinization is enabled. Otherwise, it will be a raw state-level
        lattice. The acoustic scores in the output lattice will already be
        divided by the acoustic scale used in decoding.

        If the decoder was not initialized with a symbol table, the "text"
        output will be a string of space separated integer indices. Otherwise it
        will be a string of space separated symbols.

        Returns:
            A dictionary representing decoding output.

        Raises:
            ValueError: If there is no output to return.
        """
        key, lat, text = self._get_output()
        return {"key": key, "lattice": lat, "text": text}

[docs]    def get_outputs(self):
        """Creates a generator for iterating over available outputs.

        Each output generated will be a dictionary like the output of
        :meth:`get_output`. The outputs are generated in the same order the
        inputs were provided.

        See Also: :meth:`get_output`
        """
        while True:
            try:
                yield self.get_output()
            except ValueError:
                return

[docs]    def finished(self):
        """Informs the decoder that all input has been provided.

        This will block until all computation threads have terminated. After
        that you can keep calling :meth:`get_output`, until it raises a
        `ValueError`, to get the outputs for the remaining utterances.

        Returns:
            int: The number of utterances that have been successfully decoded.
        """
        return self.decoder.finished()

[docs]    def utterance_failed(self):
        """Informs the decoder that there was a problem with an utterance.

        This will update the number of failed utterances stats.
        """
        self.decoder.utterance_failed()


[docs]class NnetLatticeFasterGrammarRecognizer(NnetRecognizer):
    """Neural network based lattice generating faster grammar speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        decoder (LatticeFasterGrammarDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decodable_opts (NnetSimpleComputationOptions): Configuration options for
            simple nnet3 am decodable objects.
        online_ivector_period (int): Onlne ivector period. Relevant only if
            online ivectors are used.
    """
    def __init__(self, transition_model, acoustic_model, decoder,
                 symbols=None, allow_partial=True, decodable_opts=None,
                 online_ivector_period=10):
        if not isinstance(decoder, _dec.LatticeFasterGrammarDecoder):
            raise TypeError("decoder argument should be a "
                            "LatticeFasterGrammarDecoder")
        super(NnetLatticeFasterGrammarRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols, allow_partial,
            decodable_opts, online_ivector_period)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   decoder_opts=None, decodable_opts=None,
                   online_ivector_period=10):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            decoder_opts (FasterDecoderOptions): Configuration options for the
                decoder.
            decodable_opts (NnetSimpleComputationOptions): Configuration options
                for simple nnet3 am decodable objects.
            online_ivector_period (int): Onlne ivector period. Relevant only if
                online ivectors are used.

        Returns:
            NnetLatticeFasterGrammarRecognizer: A new recognizer.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        with _util_io.xopen(graph_rxfilename) as ki:
            graph = _dec.GrammarFst()
            graph.read(ki.stream(), ki.binary)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeFasterGrammarDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, decodable_opts, online_ivector_period)


[docs]class NnetLatticeBiglmFasterRecognizer(NnetRecognizer):
    """Neural network based lattice generating big-LM faster speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        decoder (LatticeBiglmFasterDecoder): The decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decodable_opts (NnetSimpleComputationOptions): Configuration options for
            simple nnet3 am decodable objects.
        online_ivector_period (int): Onlne ivector period. Relevant only if
            online ivectors are used.
    """
    def __init__(self, transition_model, acoustic_model, decoder,
                 symbols=None, allow_partial=True, decodable_opts=None,
                 online_ivector_period=10):
        if not isinstance(decoder, _dec.LatticeBiglmFasterDecoder):
            raise TypeError("decoder argument should be a "
                            "LatticeBiglmFasterDecoder")
        super(NnetLatticeBiglmFasterRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols, allow_partial,
            decodable_opts, online_ivector_period)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename, old_lm_rxfilename,
                   new_lm_rxfilename, symbols_filename=None, allow_partial=True,
                   decoder_opts=None, decodable_opts=None,
                   online_ivector_period=10):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            old_lm_rxfilename (str): Extended filename for reading the old LM.
            new_lm_rxfilename (str): Extended filename for reading the new LM.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            decoder_opts (LatticeFasterDecoderOptions): Configuration
                options for the decoder.
            decodable_opts (NnetSimpleComputationOptions): Configuration options
                for simple nnet3 am decodable objects.
            online_ivector_period (int): Onlne ivector period. Relevant only if
                online ivectors are used.

        Returns:
            NnetLatticeBiglmFasterRecognizer: A new recognizer.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        self.old_lm = _fst.read_fst_kaldi(old_lm_rxfilename)
        _fst_utils.apply_probability_scale(-1.0, self.old_lm)
        self.new_lm = _fst.read_fst_kaldi(new_lm_rxfilename)
        self._old_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(self.old_lm)
        self._new_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(self.new_lm)
        self._compose_lm = _fst_spec.StdComposeDeterministicOnDemandFst(
            self._old_lm, self._new_lm)
        self._cache_compose_lm = _fst_spec.StdCacheDeterministicOnDemandFst(
            self._compose_lm)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeBiglmFasterDecoder(graph, decoder_opts,
                                                 self._cache_compose_lm)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, decodable_opts, online_ivector_period)


[docs]class OnlineRecognizer(object):
    """Base class for online speech recognizers.

    Args:
        decoder (object): The online decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        acoustic_scale (float): Acoustic score scale.
    """
    def __init__(self, decoder, symbols=None, allow_partial=True,
                 acoustic_scale=0.1):
        self.decoder = decoder
        self.symbols = symbols
        self.allow_partial = allow_partial
        self.acoustic_scale = acoustic_scale

    def _make_decodable(self, input_pipeline):
        """Constructs a new online decodable object from input pipeline.

        Args:
            input_pipeline (object): Input pipeline.

        Returns:
            DecodableInterface: An online decodable object for computing scaled
            log-likelihoods.
        """
        raise NotImplementedError

    def _determinize_lattice(self, lattice):
        """Determinizes raw state-level lattice.

        Args:
            lattice (Lattice): Raw state-level lattice.

        Returns:
            CompactLattice or Lattice: A deterministic compact lattice if the
            decoder is configured to determinize lattices. Otherwise, a raw
            state-level lattice.
        """
        opts = self.decoder.get_options()
        if opts.determinize_lattice:
            det_opts = _lat_funcs.DeterminizeLatticePrunedOptions()
            det_opts.max_mem = opts.det_opts.max_mem
            return _lat_funcs.determinize_lattice_pruned(
                lattice, opts.lattice_beam, det_opts, True)
        else:
            return lattice

[docs]    def set_input_pipeline(self, input_pipeline):
        """Sets input pipeline.

        Args:
            input_pipeline (object): Input pipeline to decode online.
        """
        self._decodable = self._make_decodable(input_pipeline)

[docs]    def init_decoding(self):
        """Initializes decoding.

        This should only be used if you intend to call :meth:`advance_decoding`.
        If you intend to call :meth:`decode`, you don't need to call this. You
        can also call this method if you have already decoded an utterance and
        want to start with a new utterance.
        """
        self.decoder.init_decoding()

[docs]    def advance_decoding(self, max_num_frames=-1):
        """Advances decoding.

        This will decode until there are no more frames ready in the input
        pipeline or `max_num_frames` are decoded. You can keep calling this as
        more frames become available.

        Args:
            max_num_frames (int): Maximum number of frames to decode. If
                negative, all available frames are decoded.
        """
        self.decoder.advance_decoding(self._decodable, max_num_frames)

[docs]    def finalize_decoding(self):
        """Finalizes decoding.

        This function may be optionally called after :meth:`advance_decoding`,
        when you do not plan to decode any further. It does an extra pruning
        step that will help to prune the output lattices more accurately,
        particularly toward the end of the utterance. It does this by using the
        final-probs in pruning (if any final-state survived); it also does a
        final pruning step that visits all states (the pruning that is done
        during decoding may fail to prune states that are within pruning_scale =
        0.1 outside of the beam). If you call this, you cannot call
        :meth:`advance_decoding` again (it will fail), and you cannot call
        get_lattice and related functions with use_final_probs = false.
        """
        self.decoder.finalize_decoding()

[docs]    def decode(self):
        """Decodes all frames in the input pipeline and returns the output.

        Output is a dictionary with the following `(key, value)` pairs:

        ============ =========================== ==============================
        key          value                       value type
        ============ =========================== ==============================
        "alignment"  Frame-level alignment       `List[int]`
        "best_path"  Best lattice path           `CompactLattice`
        "lattice"    Output lattice              `Lattice` or `CompactLattice`
        "likelihood" Log-likelihood of best path `float`
        "text"       Output transcript           `str`
        "weight"     Cost of best path           `LatticeWeight`
        "words"      Words on best path          `List[int]`
        ============ =========================== ==============================

        The "lattice" output is produced only if the decoder can generate
        lattices. It will be a deterministic compact lattice if the decoder is
        configured to determinize lattices. Otherwise, it will be a raw
        state-level lattice.

        If :attr:`symbols` is ``None``, the "text" output will be a string of
        space separated integer indices. Otherwise it will be a string of space
        separated symbols. The "weight" output is a lattice weight consisting of
        (graph-score, acoustic-score).

        Args:
            input (object): Input to decode.

        Returns:
            A dictionary representing decoding output.

        Raises:
            RuntimeError: If decoding fails.
        """
        self.decoder.decode(self._decodable)
        return self.get_output()

[docs]    def get_output(self):
        """Returns decoding output.

        Output is a dictionary with the following `(key, value)` pairs:

        ============ =========================== ==============================
        key          value                       value type
        ============ =========================== ==============================
        "alignment"  Frame-level alignment       `List[int]`
        "best_path"  Best lattice path           `CompactLattice`
        "lattice"    Output lattice              `Lattice` or `CompactLattice`
        "likelihood" Log-likelihood of best path `float`
        "text"       Output transcript           `str`
        "weight"     Cost of best path           `LatticeWeight`
        "words"      Words on best path          `List[int]`
        ============ =========================== ==============================

        The "lattice" output is produced only if the decoder can generate
        lattices. It will be a deterministic compact lattice if the decoder is
        configured to determinize lattices. Otherwise, it will be a raw
        state-level lattice.

        If :attr:`symbols` is ``None``, the "text" output will be a string of
        space separated integer indices. Otherwise it will be a string of space
        separated symbols. The "weight" output is a lattice weight consisting of
        (graph-score, acoustic-score).

        Returns:
            A dictionary representing decoding output.

        Raises:
            RuntimeError: If decoding fails.
        """
        if not (self.allow_partial or self.decoder.reached_final()):
            raise RuntimeError("No final state was active on the last frame.")

        try:
            best_path = self.decoder.get_best_path()
        except RuntimeError:
            raise RuntimeError("Empty decoding output.")

        ali, words, weight = _fst_utils.get_linear_symbol_sequence(best_path)

        if self.symbols:
            text = " ".join(_fst.indices_to_symbols(self.symbols, words))
        else:
            text = " ".join(map(str, words))

        likelihood = - (weight.value1 + weight.value2)

        if self.acoustic_scale != 0.0:
            scale = _fst_utils.acoustic_lattice_scale(1.0 / self.acoustic_scale)
            _fst_utils.scale_lattice(scale, best_path)
        best_path = _fst_utils.convert_lattice_to_compact_lattice(best_path)

        try:
            lat = self.decoder.get_raw_lattice()
        except AttributeError:
            return {
                "alignment": ali,
                "best_path": best_path,
                "likelihood": likelihood,
                "text": text,
                "weight": weight,
                "words": words,
            }
        if lat.num_states() == 0:
            raise RuntimeError("Empty output lattice.")
        lat.connect()

        lat = self._determinize_lattice(lat)

        if self.acoustic_scale != 0.0:
            if isinstance(lat, _fst.CompactLatticeVectorFst):
                _fst_utils.scale_compact_lattice(scale, lat)
            else:
                _fst_utils.scale_lattice(scale, lat)

        return {
            "alignment": ali,
            "best_path": best_path,
            "lattice": lat,
            "likelihood": likelihood,
            "text": text,
            "weight": weight,
            "words": words,
        }

[docs]    def get_partial_output(self, use_final_probs=False):
        """Returns partial decoding output.

        Output is a dictionary with the following `(key, value)` pairs:

        ============ =========================== ==============================
        key          value                       value type
        ============ =========================== ==============================
        "alignment"  Frame-level alignment       `List[int]`
        "best_path"  Best lattice path           `Lattice`
        "likelihood" Log-likelihood of best path `float`
        "text"       Output transcript           `str`
        "weight"     Cost of best path           `LatticeWeight`
        "words"      Words on best path          `List[int]`
        ============ =========================== ==============================

        If :attr:`symbols` is ``None``, the "text" output will be a string of
        space separated integer indices. Otherwise it will be a string of space
        separated symbols. The "weight" output is a lattice weight consisting of
        (graph-score, acoustic-score).

        Args:
            use_final_probs (bool): Whether to use final probabilities when
                computing best path.

        Returns:
            A dictionary representing decoding output.

        Raises:
            RuntimeError: If decoding fails.
        """
        try:
            best_path = self.decoder.get_best_path(use_final_probs)
        except RuntimeError:
            raise RuntimeError("Empty decoding output.")

        ali, words, weight = _fst_utils.get_linear_symbol_sequence(best_path)

        if self.symbols:
            text = " ".join(_fst.indices_to_symbols(self.symbols, words))
        else:
            text = " ".join(map(str, words))

        likelihood = - (weight.value1 + weight.value2)

        return {
            "alignment": ali,
            "best_path": best_path,
            "likelihood": likelihood,
            "text": text,
            "weight": weight,
            "words": words,
        }


[docs]class NnetOnlineRecognizer(OnlineRecognizer):
    """Base class for neural network based online speech recognizers.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        decoder (object): The online decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decodable_opts (NnetSimpleLoopedComputationOptions): Configuration
            options for simple looped neural network computation.
        endpoint_opts (OnlineEndpointConfig): Online endpointing configuration.
    """
    def __init__(self, transition_model, acoustic_model, decoder, symbols=None,
                 allow_partial=True, decodable_opts=None, endpoint_opts=None):
        if not isinstance(acoustic_model, _nnet3.AmNnetSimple):
            raise TypeError("acoustic_model should be a AmNnetSimple object")
        self.transition_model = transition_model
        self.acoustic_model = acoustic_model
        nnet = self.acoustic_model.get_nnet()
        _nnet3.set_batchnorm_test_mode(True, nnet)
        _nnet3.set_dropout_test_mode(True, nnet)
        _nnet3.collapse_model(_nnet3.CollapseModelConfig(), nnet)

        if decodable_opts:
            if not isinstance(decodable_opts,
                              _nnet3.NnetSimpleLoopedComputationOptions):
                raise TypeError("decodable_opts should be either None or a "
                                "NnetSimpleLoopedComputationOptions object")
            self.decodable_opts = decodable_opts
        else:
            self.decodable_opts = _nnet3.NnetSimpleLoopedComputationOptions()
        self.decodable_info = _nnet3.DecodableNnetSimpleLoopedInfo.from_am(
            self.decodable_opts, self.acoustic_model)

        if endpoint_opts:
            if not isinstance(endpoint_opts,
                              _online2.OnlineEndpointConfig):
                raise TypeError("decodable_opts should be either None or a "
                                "OnlineEndpointConfig object")
            self.endpoint_opts = endpoint_opts
        else:
            self.endpoint_opts = _online2.OnlineEndpointConfig()

        super(NnetOnlineRecognizer, self).__init__(
            decoder, symbols, allow_partial, self.decodable_opts.acoustic_scale)

[docs]    @staticmethod
    def read_model(model_rxfilename):
        """Reads model from an extended filename."""
        with _util_io.xopen(model_rxfilename) as ki:
            transition_model = _hmm.TransitionModel().read(ki.stream(),
                                                           ki.binary)
            acoustic_model = _nnet3.AmNnetSimple().read(ki.stream(), ki.binary)
        return transition_model, acoustic_model

    def _make_decodable(self, feature_pipeline):
        """Constructs a new online decodable object from input feature pipeline.

        This method also sets output_frame_shift which is used in endpointing.

        Args:
            feature_pipeline (OnlineNnetFeaturePipeline): Input feature
                pipeline.

        Returns:
            DecodableAmNnetLoopedOnline: A decodable object for computing scaled
            log-likelihoods.
        """
        self.output_frame_shift = (feature_pipeline.frame_shift_in_seconds() *
                                   self.decodable_opts.frame_subsampling_factor)
        return _nnet3.DecodableAmNnetLoopedOnline(
            self.transition_model, self.decodable_info,
            feature_pipeline.input_feature(),
            feature_pipeline.ivector_feature())

    def _determinize_lattice(self, lattice):
        """Determinizes raw state-level lattice.

        Args:
            lattice (Lattice): Raw state-level lattice.

        Returns:
            CompactLattice or Lattice: A deterministic compact lattice if the
            decoder is configured to determinize lattices. Otherwise, a raw
            state-level lattice.
        """
        opts = self.decoder.get_options()
        if opts.determinize_lattice:
            return _lat_funcs.determinize_lattice_phone_pruned(
                lattice, self.transition_model, opts.lattice_beam,
                opts.det_opts, True)
        else:
            return lattice


[docs]class NnetLatticeFasterOnlineRecognizer(NnetOnlineRecognizer):
    """Neural network based lattice generating faster online speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        decoder (LatticeFasterOnlineDecoder): The online decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decodable_opts (NnetSimpleLoopedComputationOptions): Configuration
            options for simple looped neural network computation.
        endpoint_opts (OnlineEndpointConfig): Online endpointing configuration.
    """
    def __init__(self, transition_model, acoustic_model, decoder, symbols=None,
                 allow_partial=True, decodable_opts=None, endpoint_opts=None):
        if not isinstance(decoder, _dec.LatticeFasterOnlineDecoder):
            raise TypeError("decoder argument should be a "
                            "LatticeFasterOnlineDecoder")
        super(NnetLatticeFasterOnlineRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols, allow_partial,
            decodable_opts, endpoint_opts)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   decoder_opts=None, decodable_opts=None, endpoint_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            decoder_opts (FasterDecoderOptions): Configuration options for the
                decoder.
            decodable_opts (NnetSimpleLoopedComputationOptions): Configuration
                options for simple looped neural network computation.
            endpoint_opts (OnlineEndpointConfig): Online endpointing
                configuration.

        Returns:
            NnetLatticeFasterOnlineRecognizer: A new recognizer.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        graph = _fst.read_fst_kaldi(graph_rxfilename)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeFasterOnlineDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, decodable_opts, endpoint_opts)

[docs]    def endpoint_detected(self):
        """Determines if any of the endpointing rules are active."""
        return _online2.decoding_endpoint_detected(
            self.endpoint_opts, self.transition_model,
            self.output_frame_shift, self.decoder)


[docs]class NnetLatticeFasterOnlineGrammarRecognizer(NnetOnlineRecognizer):
    """Neural network based lattice generating faster online grammar speech recognizer.

    Args:
        transition_model (TransitionModel): The transition model.
        acoustic_model (AmNnetSimple): The acoustic model.
        decoder (LatticeFasterOnlineGrammarDecoder): The online decoder.
        symbols (SymbolTable): The symbol table. If provided, "text" output of
            :meth:`decode` includes symbols instead of integer indices.
        allow_partial (bool): Whether to output decoding results if no
            final state was active on the last frame.
        decodable_opts (NnetSimpleLoopedComputationOptions): Configuration
            options for simple looped neural network computation.
        endpoint_opts (OnlineEndpointConfig): Online endpointing configuration.
    """
    def __init__(self, transition_model, acoustic_model, decoder, symbols=None,
                 allow_partial=True, decodable_opts=None, endpoint_opts=None):
        if not isinstance(decoder, _dec.LatticeFasterOnlineGrammarDecoder):
            raise TypeError("decoder argument should be a "
                            "LatticeFasterOnlineGrammarDecoder")
        super(NnetLatticeFasterOnlineGrammarRecognizer, self).__init__(
            transition_model, acoustic_model, decoder, symbols, allow_partial,
            decodable_opts, endpoint_opts)

[docs]    @classmethod
    def from_files(cls, model_rxfilename, graph_rxfilename,
                   symbols_filename=None, allow_partial=True,
                   decoder_opts=None, decodable_opts=None, endpoint_opts=None):
        """Constructs a new recognizer from given files.

        Args:
            model_rxfilename (str): Extended filename for reading the model.
            graph_rxfilename (str): Extended filename for reading the graph.
            symbols_filename (str): The symbols file. If provided, "text" output
                of :meth:`decode` includes symbols instead of integer indices.
            allow_partial (bool): Whether to output decoding results if no
                final state was active on the last frame.
            decoder_opts (FasterDecoderOptions): Configuration options for the
                decoder.
            decodable_opts (NnetSimpleLoopedComputationOptions): Configuration
                options for simple looped neural network computation.
            endpoint_opts (OnlineEndpointConfig): Online endpointing
                configuration.

        Returns:
            NnetLatticeFasterOnlineGrammarRecognizer: A new recognizer.
        """
        transition_model, acoustic_model = cls.read_model(model_rxfilename)
        with _util_io.xopen(graph_rxfilename) as ki:
            graph = _dec.GrammarFst()
            graph.read(ki.stream(), ki.binary)
        if not decoder_opts:
            decoder_opts = _dec.LatticeFasterDecoderOptions()
        decoder = _dec.LatticeFasterOnlineGrammarDecoder(graph, decoder_opts)
        if symbols_filename is None:
            symbols = None
        else:
            symbols = _fst.SymbolTable.read_text(symbols_filename)
        return cls(transition_model, acoustic_model, decoder, symbols,
                   allow_partial, decodable_opts, endpoint_opts)

[docs]    def endpoint_detected(self):
        """Determines if any of the endpointing rules are active."""
        return _online2.decoding_endpoint_detected_grammar(
            self.endpoint_opts, self.transition_model,
            self.output_frame_shift, self.decoder)


[docs]class LatticeLmRescorer(object):
    """Lattice LM rescorer.

    If `phi_label` is provided, rescoring will be "exact" in the sense that
    back-off arcs in the new LM will only be taken if there are no other
    matching arcs. Inexact rescoring can overestimate the new LM scores for some
    paths in the output lattice. This happens when back-off paths have higher
    scores than matching regular paths in the new LM.

    Args:
        old_lm (StdFst): Old language model FST.
        new_lm (StdFst): New language model FST.
        phi_label (int): Back-off label in the new LM.
    """
    def __init__(self, old_lm, new_lm, phi_label=None):
        self.phi_label = phi_label
        self.old_lm = _fst_utils.convert_std_to_lattice(old_lm).project(True)
        if not bool(self.old_lm.properties(_fst_props.I_LABEL_SORTED, True)):
            self.old_lm.arcsort()
        self.new_lm = _fst_utils.convert_std_to_lattice(new_lm)
        if not self.phi_label:
            self.new_lm.project(True)
        if not bool(self.new_lm.properties(_fst_props.I_LABEL_SORTED, True)):
            self.new_lm.arcsort()
        self.old_lm_compose_cache = _fst_spec.LatticeTableComposeCache.from_compose_opts(
            _fst_spec.TableComposeOptions.from_matcher_opts(
                _fst_spec.TableMatcherOptions(),
                table_match_type=_fst.MatchType.MATCH_INPUT))
        if not self.phi_label:
            self.new_lm_compose_cache = _fst_spec.LatticeTableComposeCache.from_compose_opts(
                _fst_spec.TableComposeOptions.from_matcher_opts(
                    _fst_spec.TableMatcherOptions(),
                    table_match_type=_fst.MatchType.MATCH_INPUT))

[docs]    def rescore(self, lat):
        """Rescores input lattice.

        Args:
            lat (CompactLatticeFst): Input lattice.

        Returns:
            CompactLatticeVectorFst: Rescored lattice.
        """
        if isinstance(lat, _fst_fst.CompactLatticeFst):
            lat = _fst_utils.convert_compact_lattice_to_lattice(lat)
        else:
            raise TypeError("Input should be a compact lattice.")
        scale = _fst_utils.graph_lattice_scale(-1.0)
        _fst_utils.scale_lattice(scale, lat)
        if not bool(lat.properties(_fst_props.O_LABEL_SORTED, True)):
            lat.arcsort("olabel")
        composed_lat = _fst.LatticeVectorFst()
        _fst_spec.table_compose_cache_lattice(lat, self.old_lm, composed_lat,
                                              self.old_lm_compose_cache)
        determinized_lat = _fst_spec.determinize_lattice(composed_lat.invert(),
                                                         False).invert()
        _fst_utils.scale_lattice(scale, determinized_lat)
        if self.phi_label:
            _fst_utils.phi_compose_lattice(determinized_lat, self.new_lm,
                                           self.phi_label, composed_lat)
        else:
            _fst_spec.table_compose_cache_lattice(determinized_lat,
                                                  self.new_lm, composed_lat,
                                                  self.new_lm_compose_cache)
        determinized_lat = _fst_spec.determinize_lattice(composed_lat.invert())
        return determinized_lat

[docs]    @classmethod
    def from_files(cls, old_lm_rxfilename, new_lm_rxfilename, phi_label=None):
        """Constructs a new lattice LM rescorer from given files.

        Args:
            old_lm_rxfilename (str): Extended filename for reading the old LM.
            new_lm_rxfilename (str): Extended filename for reading the new LM.
            phi_label (int): Back-off label in the new LM.

        Returns:
            LatticeRescorer: A new lattice LM rescorer.
        """
        old_lm = _fst.read_fst_kaldi(old_lm_rxfilename)
        new_lm = _fst.read_fst_kaldi(new_lm_rxfilename)
        return cls(old_lm, new_lm, phi_label)


class LatticeRnnlmPrunedRescorer(object):
    """Lattice RNNLM rescorer.

    Args:
        old_lm (ConstArpaLm or StdFst): Old LM.
        word_embedding_mat (CuMatrix): Word embeddings.
        rnnlm (Nnet): RNNLM.
        lm_scale (float): Scaling factor for RNNLM weights. Negated scaling
            factor will be applied to old LM weights.
        acoustic_scale (float): Scaling factor for acoustic weights.
        max_ngram_order (int): RNNLM histories longer than this value will
            be considered equivalent for rescoring purposes. This is an
            approximation saving time and reducing output lattice size.
        opts (RnnlmComputeStateComputationOptions): Options for RNNLM
            state computation.
        compose_opts (ComposeLatticePrunedOptions): Options for pruned
            lattice composition.
    """
    def __init__(self, old_lm, word_embedding_mat, rnnlm,
                 lm_scale=0.5, acoustic_scale=0.1, max_ngram_order=3,
                 opts=None, compose_opts=None):
        self.old_lm = old_lm
        if isinstance(self.old_lm, _lm.ConstArpaLm):
            self.det_old_lm = _lm.ConstArpaLmDeterministicFst(self.old_lm)
        else:
            if not bool(self.old_lm.properties(_fst_props.ACCEPTOR, True)):
                self.old_lm.project(True)
            if not bool(self.old_lm.properties(_fst_props.I_LABEL_SORTED, True)):
                self.old_lm.arcsort()
            self.det_old_lm = _fst_spec.StdBackoffDeterministicOnDemandFst(
                self.old_lm)
        self.scaled_old_lm = _fst_spec.ScaleDeterministicOnDemandFst(
            -lm_scale, self.det_old_lm)
        if not _nnet3.is_simple_nnet(rnnlm):
            raise ValueError("RNNLM should be a simple nnet")
        if not opts:
            opts = _rnnlm.RnnlmComputeStateComputationOptions()
        self.word_embedding_mat = word_embedding_mat
        self.rnnlm = rnnlm
        self.info = _rnnlm.RnnlmComputeStateInfo(opts, self.rnnlm,
                                                 self.word_embedding_mat)
        self.det_rnnlm = _rnnlm.KaldiRnnlmDeterministicFst(max_ngram_order,
                                                           self.info)
        self.lm_scale = lm_scale
        self.acoustic_scale = acoustic_scale
        if compose_opts:
            self.compose_opts = compose_opts
        else:
            self.compose_opts = _lat_funcs.ComposeLatticePrunedOptions()

    def rescore(self, lat):
        """Rescores input lattice.

        Args:
            lat (CompactLatticeFst): Input lattice.

        Returns:
            CompactLatticeVectorFst: Rescored lattice.
        """
        scaled_rnnlm = _fst_spec.ScaleDeterministicOnDemandFst(
            self.lm_scale, self.det_rnnlm)
        if self.acoustic_scale != 1.0:
            scale = _fst_utils.acoustic_lattice_scale(self.acoustic_scale)
            _fst_utils.scale_compact_lattice(scale, lat)
        _lat_funcs.top_sort_lattice_if_needed(lat)
        combined_lms = _fst_spec.StdComposeDeterministicOnDemandFst(
            self.scaled_old_lm, scaled_rnnlm)
        composed_lat = _lat_funcs.compose_compact_lattice_pruned(
            self.compose_opts, lat, combined_lms)
        self.det_rnnlm.clear()
        if self.acoustic_scale != 1.0:
            scale = _fst_utils.acoustic_lattice_scale(1.0 / self.acoustic_scale)
            _fst_utils.scale_compact_lattice(scale, composed_lat)
        return composed_lat

    @classmethod
    def from_files(cls, old_lm_rxfilename, word_embedding_rxfilename,
                   rnnlm_rxfilename, lm_scale=0.5, acoustic_scale=0.1,
                   max_ngram_order=3, use_const_arpa=False, opts=None,
                   compose_opts=None):
        """Constructs a new lattice LM rescorer from given files.

        Args:
            old_lm_rxfilename (str): Extended filename for reading the old LM.
            word_embedding_rxfilename (str): Extended filename for reading the
                word embeddings.
            rnnlm_rxfilename (str): Extended filename for reading the new RNNLM.
            lm_scale (float): Scaling factor for RNNLM weights. Negated scaling
                factor will be applied to old LM weights.
            acoustic_scale (float): Scaling factor for acoustic weights.
            max_ngram_order (int): RNNLM histories longer than this value will
                be considered equivalent for rescoring purposes. This is an
                approximation saving time and reducing output lattice size.
            use_const_arpa (bool): If True, read the old LM as a const-arpa
                file as opposed to an FST file. This is helpful when rescoring
                with a large LM.
            opts (RnnlmComputeStateComputationOptions): Options for RNNLM
                state computation.
            compose_opts (ComposeLatticePrunedOptions): Options for pruned
                lattice composition.

        Returns:
            LatticeRnnlmPrunedRescorer: A new lattice RNNLM rescorer.
        """
        if use_const_arpa:
            with _util_io.xopen(old_lm_rxfilename) as ki:
                old_lm = _lm.ConstArpaLm()
                old_lm.read(ki.stream(), ki.binary)
        else:
            old_lm = _fst.read_fst_kaldi(old_lm_rxfilename)
        with _util_io.xopen(word_embedding_rxfilename) as ki:
            word_embedding_mat = _cumatrix.CuMatrix()
            word_embedding_mat.read(ki.stream(), ki.binary)
        with _util_io.xopen(rnnlm_rxfilename) as ki:
            rnnlm = _nnet3.Nnet()
            rnnlm.read(ki.stream(), ki.binary)
        return cls(old_lm, word_embedding_mat, rnnlm, lm_scale, acoustic_scale,
                   max_ngram_order, opts, compose_opts)