Skip to content

mistral_common.tokens.tokenizers.multimodal

ImageChunk(**data)

Bases: BaseContentChunk

Image chunk.

Attributes:

Name Type Description
image SerializableImage

The image to be sent to the model.

Examples:

>>> from PIL import Image
>>> image_chunk = ImageChunk(image=Image.new('RGB', (200, 200), color='blue'))
Source code in .venv/lib/python3.13/site-packages/pydantic/main.py
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

from_openai(openai_chunk) classmethod

Converts the OpenAI chunk to the Mistral format.

Source code in src/mistral_common/protocol/instruct/messages.py
@classmethod
def from_openai(cls, openai_chunk: Dict[str, Union[str, Dict[str, str]]]) -> "ImageChunk":
    r"""Converts the OpenAI chunk to the Mistral format."""
    assert openai_chunk.get("type") == "image_url", openai_chunk

    image_url_dict = openai_chunk["image_url"]
    assert isinstance(image_url_dict, dict) and "url" in image_url_dict, image_url_dict

    if re.match(r"^data:image/\w+;base64,", image_url_dict["url"]):  # Remove the prefix if it exists
        image_url_dict["url"] = image_url_dict["url"].split(",")[1]

    return cls.model_validate({"image": image_url_dict["url"]})

to_openai()

Converts the chunk to the OpenAI format.

Source code in src/mistral_common/protocol/instruct/messages.py
def to_openai(self) -> Dict[str, Union[str, Dict[str, str]]]:
    r"""Converts the chunk to the OpenAI format."""
    base64_image = self.model_dump(include={"image"}, context={"add_format_prefix": True})["image"]
    return {"type": "image_url", "image_url": {"url": base64_image}}

ImageConfig(image_patch_size, max_image_size, spatial_merge_size=1) dataclass

Configuration for the image tokenizers.

ImageEncoder(image_config, special_ids)

Image encoder for the image tokenizer.

Parameters:

Name Type Description Default
image_config ImageConfig

Configuration for the image tokenizer.

required
special_ids SpecialImageIDs

Special image tokens ids.

required
Source code in src/mistral_common/tokens/tokenizers/image.py
def __init__(self, image_config: ImageConfig, special_ids: SpecialImageIDs) -> None:
    r"""Initialize the image encoder.

    Args:
        image_config: Configuration for the image tokenizer.
        special_ids: Special image tokens ids.
    """
    self.image_config = image_config
    self.special_ids = special_ids

__call__(content)

Converts an image chunk to an image encoding.

Parameters:

Name Type Description Default
content Union[ImageChunk, ImageURLChunk]

image chunk to be converted.

required

Returns:

Type Description
ImageEncoding

Image encoding.

Source code in src/mistral_common/tokens/tokenizers/image.py
def __call__(self, content: Union[ImageChunk, ImageURLChunk]) -> ImageEncoding:
    r"""Converts an image chunk to an image encoding.

    Args:
        content: image chunk to be converted.

    Returns:
        Image encoding.
    """
    image = image_from_chunk(content)
    w, h = self._image_to_num_tokens(image)
    assert w > 0
    assert h > 0
    image_tokens = ([self.special_ids.img] * w + [self.special_ids.img_break]) * h
    image_tokens[-1] = self.special_ids.img_end
    new_image_size = (
        w * self.image_config.image_patch_size * self.image_config.spatial_merge_size,
        h * self.image_config.image_patch_size * self.image_config.spatial_merge_size,
    )
    processed_image = transform_image(image, new_image_size)
    return ImageEncoding(tokens=image_tokens, image=processed_image)

ImageEncoding(tokens, image) dataclass

A tokenized image.

Attributes:

Name Type Description
tokens List[int]

The token ids.

image ndarray

The image as a numpy array.

Examples:

>>> import numpy as np
>>> image_encoding = ImageEncoding(tokens=[1, 2, 3], image=np.array([[0., 0.5, 1.]]))

ImageURLChunk(**data)

Bases: BaseContentChunk

Image URL chunk.

Attributes:

Name Type Description
image_url Union[ImageURL, str]

The URL of the image or a base64 encoded image to be sent to the model.

Examples:

>>> image_url_chunk = ImageURLChunk(image_url="data:image/png;base64,iVBORw0")
Source code in .venv/lib/python3.13/site-packages/pydantic/main.py
def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

from_openai(openai_chunk) classmethod

Converts the OpenAI chunk to the Mistral format.

Source code in src/mistral_common/protocol/instruct/messages.py
@classmethod
def from_openai(cls, openai_chunk: Dict[str, Union[str, Dict[str, str]]]) -> "ImageURLChunk":
    r"""Converts the OpenAI chunk to the Mistral format."""
    return cls.model_validate({"image_url": openai_chunk["image_url"]})

to_openai()

Converts the chunk to the OpenAI format.

Source code in src/mistral_common/protocol/instruct/messages.py
def to_openai(self) -> Dict[str, Union[str, Dict[str, str]]]:
    r"""Converts the chunk to the OpenAI format."""
    image_url_dict = {"url": self.get_url()}
    if isinstance(self.image_url, ImageURL) and self.image_url.detail is not None:
        image_url_dict["detail"] = self.image_url.detail

    out_dict: Dict[str, Union[str, Dict[str, str]]] = {
        "type": "image_url",
        "image_url": image_url_dict,
    }
    return out_dict

MultiModalVersion

Bases: str, Enum

Version of the image tokenizer.

SpecialImageIDs(img, img_break, img_end) dataclass

Special image tokens ids.

Attributes:

Name Type Description
img int

The image token id.

img_break int

The image break token id.

img_end int

The image end token id.

Examples:

>>> special_image_ids = SpecialImageIDs(img=1, img_break=2, img_end=3)

download_image(url)

Download an image from a URL and return it as a PIL Image.

Parameters:

Name Type Description Default
url str

The URL of the image to download.

required

Returns:

Type Description
Image

The downloaded image as a PIL Image object.

Source code in src/mistral_common/image.py
def download_image(url: str) -> Image.Image:
    r"""Download an image from a URL and return it as a PIL Image.

    Args:
        url: The URL of the image to download.

    Returns:
       The downloaded image as a PIL Image object.
    """
    headers = {"User-Agent": f"mistral-common/{__version__}"}
    try:
        # Make a request to download the image
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses (4xx, 5xx)

        # Convert the image content to a PIL Image
        img = Image.open(io.BytesIO(response.content))
        return img

    except requests.exceptions.RequestException as e:
        raise RuntimeError(f"Error downloading the image from {url}: {e}.")
    except Exception as e:
        raise RuntimeError(f"Error converting to PIL image: {e}")

image_from_chunk(chunk)

Get a serializable image from a chunk.

Parameters:

Name Type Description Default
chunk Union[ImageURLChunk, ImageChunk]

The chunk to get the image from.

required

Returns:

Type Description
SerializableImage

The image as a PIL Image object.

Source code in src/mistral_common/tokens/tokenizers/image.py
def image_from_chunk(chunk: Union[ImageURLChunk, ImageChunk]) -> SerializableImage:
    r"""Get a serializable image from a chunk.

    Args:
        chunk: The chunk to get the image from.

    Returns:
        The image as a PIL Image object.
    """
    if isinstance(chunk, ImageChunk):
        return chunk.image
    if chunk.get_url().startswith("data:image"):
        data = chunk.get_url().split(",")[1]
        image_data = base64.b64decode(data)
        return Image.open(BytesIO(image_data))
    if chunk.get_url().startswith("file"):
        return Image.open(open(chunk.get_url().replace("file://", ""), "rb"))
    if chunk.get_url().startswith("http"):
        return download_image(chunk.get_url())

    raise RuntimeError(f"Unsupported image url scheme {chunk.get_url()}")

is_cv2_installed()

Check if OpenCV is installed.

Source code in src/mistral_common/tokens/tokenizers/image.py
def is_cv2_installed() -> bool:
    r"""Check if OpenCV is installed."""
    return _cv2_installed

normalize(np_image, mean, std)

Normalize a tensor image with mean and standard deviation.

Parameters:

Name Type Description Default
np_image ndarray

Image to be normalized.

required
mean Tuple[float, float, float]

Mean for each channel.

required
std Tuple[float, float, float]

Standard deviation for each channel.

required

Returns:

Type Description
ndarray

Normalized image with shape (C, H, W).

Source code in src/mistral_common/tokens/tokenizers/image.py
def normalize(
    np_image: np.ndarray,
    mean: Tuple[float, float, float],
    std: Tuple[float, float, float],
) -> np.ndarray:
    r"""Normalize a tensor image with mean and standard deviation.

    Args:
        np_image: Image to be normalized.
        mean: Mean for each channel.
        std: Standard deviation for each channel.

    Returns:
        Normalized image with shape (C, H, W).
    """
    np_image = np_image / 255.0

    assert len(np_image.shape) == 3, f"{np_image.shape=}"
    assert np_image.shape[2] == len(mean) == len(std), f"{np_image.shape=}, {mean=}, {std=}"

    np_image = (np_image - mean) / std

    return np_image.transpose(2, 0, 1)

transform_image(image, new_size)

Transform an image to a numpy array with the given size.

Parameters:

Name Type Description Default
image Image

Image to be transformed.

required
new_size Tuple[int, int]

New size of the image.

required

Returns:

Type Description
ndarray

Transformed image with shape (C, H, W).

Source code in src/mistral_common/tokens/tokenizers/image.py
def transform_image(image: Image.Image, new_size: Tuple[int, int]) -> np.ndarray:
    r"""Transform an image to a numpy array with the given size.

    Args:
        image: Image to be transformed.
        new_size: New size of the image.

    Returns:
        Transformed image with shape (C, H, W).
    """
    if not is_cv2_installed():
        raise ImportError("OpenCV is required for this function. Install it with 'pip install mistral-common[opencv]'")

    np_image = cv2.resize(np.array(_convert_to_rgb(image), dtype=np.float32), new_size, interpolation=cv2.INTER_CUBIC)
    return normalize(np_image, DATASET_MEAN, DATASET_STD)