Skip to content

mistral_common.protocol.speech.request

SpeechRequest(**data)

Bases: BaseCompletionRequest

Request for text-to-speech synthesis.

Supports both preset voices and voice cloning via reference audio.

Attributes:

Name Type Description
id str | None

Optional unique identifier for the speech request.

model str | None

Optional model identifier for the speech synthesis.

input str

Text input to be converted to speech.

voice str | None

Optional preset voice identifier (e.g., 'Neutral Male', 'Neutral Female') to use for speech synthesis.

ref_audio str | bytes | None

Optional reference audio for voice cloning, provided as a base64-encoded string or raw bytes. Takes precedence over voice when both are provided.

Source code in .venv/lib/python3.14/site-packages/pydantic/main.py
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

from_openai(openai_request, strict=False) classmethod

Create a SpeechRequest instance from an OpenAI-compatible request dictionary.

Parameters:

Name Type Description Default
openai_request dict[str, Any]

The OpenAI request dictionary.

required
strict bool

A flag indicating whether to perform strict validation of the audio data.

False

Returns:

Type Description
SpeechRequest

An instance of SpeechRequest.

Source code in src/mistral_common/protocol/speech/request.py
@classmethod
def from_openai(cls, openai_request: dict[str, Any], strict: bool = False) -> "SpeechRequest":
    r"""Create a SpeechRequest instance from an OpenAI-compatible request dictionary.

    Args:
        openai_request: The OpenAI request dictionary.
        strict: A flag indicating whether to perform strict validation of the audio data.

    Returns:
        An instance of SpeechRequest.
    """
    converted_dict: dict[str, Any] = {k: v for k, v in openai_request.items() if k in cls.model_fields}

    if (ref_audio := openai_request.get("ref_audio")) is not None:
        if isinstance(ref_audio, io.BytesIO):
            audio_bytes = ref_audio.getvalue()
        elif hasattr(ref_audio, "file"):
            audio_bytes = ref_audio.file.read()
        else:
            # Already a string (base64) or bytes
            audio_bytes = ref_audio

        if isinstance(audio_bytes, bytes):
            audio = Audio.from_bytes(audio_bytes, strict=strict)
            assert audio.format is not None, f"Audio format must be set, got {audio.format=}"
            converted_dict["ref_audio"] = audio.to_base64(audio.format)
        else:
            converted_dict["ref_audio"] = audio_bytes

    # OAI uses "voice" as a string or object with "id"; normalize to string
    voice = openai_request.get("voice")
    if isinstance(voice, dict):
        converted_dict["voice"] = voice["id"]

    return cls(**converted_dict)

to_openai(**kwargs)

Convert this SpeechRequest to an OpenAI-compatible request dictionary.

Parameters:

Name Type Description Default
**kwargs Any

Additional key-value pairs to include in the request dictionary.

{}

Returns:

Type Description
dict[str, Any]

An OpenAI-compatible request dictionary.

Source code in src/mistral_common/protocol/speech/request.py
def to_openai(self, **kwargs: Any) -> dict[str, Any]:
    r"""Convert this SpeechRequest to an OpenAI-compatible request dictionary.

    Args:
        **kwargs: Additional key-value pairs to include in the request dictionary.

    Returns:
        An OpenAI-compatible request dictionary.
    """
    openai_request: dict[str, Any] = self.model_dump(exclude={"ref_audio"})

    assert_soundfile_installed()

    if self.ref_audio is not None:
        if isinstance(self.ref_audio, bytes):
            buffer = io.BytesIO(self.ref_audio)
        else:
            audio = Audio.from_base64(self.ref_audio)

            buffer = io.BytesIO()
            sf.write(buffer, audio.audio_array, audio.sampling_rate, format=audio.format)
            buffer.seek(0)

        openai_request["ref_audio"] = buffer

    openai_request.update(kwargs)

    return openai_request