Skip to content

mistral_common.audio

hertz_to_mel(freq)

Convert frequency from hertz to mels using the "slaney" mel-scale.

Parameters:

Name Type Description Default
freq float | ndarray

The frequency, or multiple frequencies, in hertz (Hz).

required

Returns:

Type Description
float | ndarray

The frequencies on the mel scale.

Source code in src/mistral_common/audio.py
def hertz_to_mel(freq: float | np.ndarray) -> float | np.ndarray:
    r"""Convert frequency from hertz to mels using the "slaney" mel-scale.

    Args:
        freq: The frequency, or multiple frequencies, in hertz (Hz).

    Returns:
        The frequencies on the mel scale.
    """
    min_log_hertz = 1000.0
    min_log_mel = 15.0
    logstep = 27.0 / np.log(6.4)
    mels = 3.0 * freq / 200.0

    if isinstance(freq, np.ndarray):
        assert isinstance(mels, np.ndarray), type(mels)
        log_region = freq >= min_log_hertz
        mels[log_region] = min_log_mel + np.log(freq[log_region] / min_log_hertz) * logstep
    elif freq >= min_log_hertz:
        mels = min_log_mel + np.log(freq / min_log_hertz) * logstep

    return mels

mel_filter_bank(num_frequency_bins, num_mel_bins, min_frequency, max_frequency, sampling_rate) cached

Create a Mel filter bank matrix for converting frequency bins to the Mel scale.

This function generates a filter bank matrix that can be used to transform a spectrum represented in frequency bins to the Mel scale. The Mel scale is a perceptual scale of pitches judged by listeners to be equal in distance from one another.

Parameters:

Name Type Description Default
num_frequency_bins int

The number of frequency bins in the input spectrum.

required
num_mel_bins int

The number of desired Mel bins in the output.

required
min_frequency float

The minimum frequency (in Hz) to consider.

required
max_frequency float

The maximum frequency (in Hz) to consider.

required
sampling_rate int

The sampling rate of the audio signal.

required

Returns:

Type Description
ndarray

A filter bank matrix of shape (num_mel_bins, num_frequency_bins)

ndarray

that can be used to project frequency bin energies onto Mel bins.

Source code in src/mistral_common/audio.py
@cache
def mel_filter_bank(
    num_frequency_bins: int,
    num_mel_bins: int,
    min_frequency: float,
    max_frequency: float,
    sampling_rate: int,
) -> np.ndarray:
    r"""Create a Mel filter bank matrix for converting frequency bins to the Mel scale.

    This function generates a filter bank matrix that can be used to transform a
    spectrum represented in frequency bins to the Mel scale. The Mel scale is a
    perceptual scale of pitches judged by listeners to be equal in distance from one another.

    Args:
        num_frequency_bins: The number of frequency bins in the input spectrum.
        num_mel_bins: The number of desired Mel bins in the output.
        min_frequency: The minimum frequency (in Hz) to consider.
        max_frequency: The maximum frequency (in Hz) to consider.
        sampling_rate: The sampling rate of the audio signal.

    Returns:
        A filter bank matrix of shape (num_mel_bins, num_frequency_bins)
        that can be used to project frequency bin energies onto Mel bins.
    """
    if num_frequency_bins < 2:
        raise ValueError(f"Require num_frequency_bins: {num_frequency_bins} >= 2")

    if min_frequency > max_frequency:
        raise ValueError(f"Require min_frequency: {min_frequency} <= max_frequency: {max_frequency}")

    # center points of the triangular mel filters
    mel_min = hertz_to_mel(min_frequency)
    mel_max = hertz_to_mel(max_frequency)
    mel_freqs = np.linspace(mel_min, mel_max, num_mel_bins + 2)
    filter_freqs = mel_to_hertz(mel_freqs)

    # frequencies of FFT bins in Hz
    fft_freqs = np.linspace(0, sampling_rate // 2, num_frequency_bins)

    mel_filters = _create_triangular_filter_bank(fft_freqs, filter_freqs)

    # Slaney-style mel is scaled to be approx constant energy per channel
    enorm = 2.0 / (filter_freqs[2 : num_mel_bins + 2] - filter_freqs[:num_mel_bins])
    mel_filters *= np.expand_dims(enorm, 0)

    if (mel_filters.max(axis=0) == 0.0).any():
        raise ValueError(
            "At least one mel filter has all zero values. "
            f"The value for `num_mel_filters` ({num_mel_bins}) "
            "may be set too high. "
            "Or, the value for `num_frequency_bins` "
            f"({num_frequency_bins}) may be set too low."
        )
    return mel_filters

mel_to_hertz(mels)

Convert frequency from mels to hertz using the "slaney" mel-scale.

Parameters:

Name Type Description Default
mels ndarray

The frequency, or multiple frequencies, in mels.

required

Returns:

Type Description
ndarray

The frequencies in hertz.

Source code in src/mistral_common/audio.py
def mel_to_hertz(mels: np.ndarray) -> np.ndarray:
    r"""Convert frequency from mels to hertz using the "slaney" mel-scale.

    Args:
        mels: The frequency, or multiple frequencies, in mels.

    Returns:
        The frequencies in hertz.
    """
    min_log_hertz = 1000.0
    min_log_mel = 15.0
    logstep = np.log(6.4) / 27.0
    freq = 200.0 * mels / 3.0

    log_region = mels >= min_log_mel
    freq[log_region] = min_log_hertz * np.exp(logstep * (mels[log_region] - min_log_mel))
    return freq