`mistral_common.tokens.tokenizers.multimodal`

`ImageChunk(**data)`

Bases: BaseContentChunk

Image chunk.

Attributes:

Name	Type	Description
`image`	`SerializableImage`	The image to be sent to the model.

Examples:

>>> from PIL import Image
>>> image_chunk = ImageChunk(image=Image.new('RGB', (200, 200), color='blue'))

Source code in .venv/lib/python3.13/site-packages/pydantic/main.py

def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

`from_openai(openai_chunk)` `classmethod`

Converts the OpenAI chunk to the Mistral format.

Source code in src/mistral_common/protocol/instruct/messages.py

@classmethod
def from_openai(cls, openai_chunk: Dict[str, Union[str, Dict[str, str]]]) -> "ImageChunk":
    r"""Converts the OpenAI chunk to the Mistral format."""
    assert openai_chunk.get("type") == "image_url", openai_chunk

    image_url_dict = openai_chunk["image_url"]
    assert isinstance(image_url_dict, dict) and "url" in image_url_dict, image_url_dict

    if re.match(r"^data:image/\w+;base64,", image_url_dict["url"]):  # Remove the prefix if it exists
        image_url_dict["url"] = image_url_dict["url"].split(",")[1]

    return cls.model_validate({"image": image_url_dict["url"]})

`to_openai()`

Converts the chunk to the OpenAI format.

Source code in src/mistral_common/protocol/instruct/messages.py

def to_openai(self) -> Dict[str, Union[str, Dict[str, str]]]:
    r"""Converts the chunk to the OpenAI format."""
    base64_image = self.model_dump(include={"image"}, context={"add_format_prefix": True})["image"]
    return {"type": "image_url", "image_url": {"url": base64_image}}

`ImageConfig(image_patch_size, max_image_size, spatial_merge_size=1)` `dataclass`

Configuration for the image tokenizers.

`ImageEncoder(image_config, special_ids)`

Image encoder for the image tokenizer.

Parameters:

Name	Type	Description	Default
`image_config`	`ImageConfig`	Configuration for the image tokenizer.	required
`special_ids`	`SpecialImageIDs`	Special image tokens ids.	required

Source code in src/mistral_common/tokens/tokenizers/image.py

def __init__(self, image_config: ImageConfig, special_ids: SpecialImageIDs) -> None:
    r"""Initialize the image encoder.

    Args:
        image_config: Configuration for the image tokenizer.
        special_ids: Special image tokens ids.
    """
    self.image_config = image_config
    self.special_ids = special_ids

`call(content)`

Converts an image chunk to an image encoding.

Parameters:

Name	Type	Description	Default
`content`	`Union[ImageChunk, ImageURLChunk]`	image chunk to be converted.	required

Returns:

Type	Description
`ImageEncoding`	Image encoding.

Source code in src/mistral_common/tokens/tokenizers/image.py

def __call__(self, content: Union[ImageChunk, ImageURLChunk]) -> ImageEncoding:
    r"""Converts an image chunk to an image encoding.

    Args:
        content: image chunk to be converted.

    Returns:
        Image encoding.
    """
    image = image_from_chunk(content)
    w, h = self._image_to_num_tokens(image)
    assert w > 0
    assert h > 0
    image_tokens = ([self.special_ids.img] * w + [self.special_ids.img_break]) * h
    image_tokens[-1] = self.special_ids.img_end
    new_image_size = (
        w * self.image_config.image_patch_size * self.image_config.spatial_merge_size,
        h * self.image_config.image_patch_size * self.image_config.spatial_merge_size,
    )
    processed_image = transform_image(image, new_image_size)
    return ImageEncoding(tokens=image_tokens, image=processed_image)

`ImageEncoding(tokens, image)` `dataclass`

A tokenized image.

Attributes:

Name	Type	Description
`tokens`	`List[int]`	The token ids.
`image`	`ndarray`	The image as a numpy array.

Examples:

>>> import numpy as np
>>> image_encoding = ImageEncoding(tokens=[1, 2, 3], image=np.array([[0., 0.5, 1.]]))

`ImageURLChunk(**data)`

Bases: BaseContentChunk

Image URL chunk.

Attributes:

Name	Type	Description
`image_url`	`Union[ImageURL, str]`	The URL of the image or a base64 encoded image to be sent to the model.

Examples:

>>> image_url_chunk = ImageURLChunk(image_url="data:image/png;base64,iVBORw0")

Source code in .venv/lib/python3.13/site-packages/pydantic/main.py

def __init__(self, /, **data: Any) -> None:
    """Create a new model by parsing and validating input data from keyword arguments.

    Raises [`ValidationError`][pydantic_core.ValidationError] if the input data cannot be
    validated to form a valid model.

    `self` is explicitly positional-only to allow `self` as a field name.
    """
    # `__tracebackhide__` tells pytest and some other tools to omit this function from tracebacks
    __tracebackhide__ = True
    validated_self = self.__pydantic_validator__.validate_python(data, self_instance=self)
    if self is not validated_self:
        warnings.warn(
            'A custom validator is returning a value other than `self`.\n'
            "Returning anything other than `self` from a top level model validator isn't supported when validating via `__init__`.\n"
            'See the `model_validator` docs (https://docs.pydantic.dev/latest/concepts/validators/#model-validators) for more details.',
            stacklevel=2,
        )

`from_openai(openai_chunk)` `classmethod`

Converts the OpenAI chunk to the Mistral format.

Source code in src/mistral_common/protocol/instruct/messages.py

@classmethod
def from_openai(cls, openai_chunk: Dict[str, Union[str, Dict[str, str]]]) -> "ImageURLChunk":
    r"""Converts the OpenAI chunk to the Mistral format."""
    return cls.model_validate({"image_url": openai_chunk["image_url"]})

`to_openai()`

Converts the chunk to the OpenAI format.

Source code in src/mistral_common/protocol/instruct/messages.py

def to_openai(self) -> Dict[str, Union[str, Dict[str, str]]]:
    r"""Converts the chunk to the OpenAI format."""
    image_url_dict = {"url": self.get_url()}
    if isinstance(self.image_url, ImageURL) and self.image_url.detail is not None:
        image_url_dict["detail"] = self.image_url.detail

    out_dict: Dict[str, Union[str, Dict[str, str]]] = {
        "type": "image_url",
        "image_url": image_url_dict,
    }
    return out_dict

`MultiModalVersion`

Bases: str, Enum

Version of the image tokenizer.

`SpecialImageIDs(img, img_break, img_end)` `dataclass`

Special image tokens ids.

Attributes:

Name	Type	Description
`img`	`int`	The image token id.
`img_break`	`int`	The image break token id.
`img_end`	`int`	The image end token id.

Examples:

>>> special_image_ids = SpecialImageIDs(img=1, img_break=2, img_end=3)

`download_image(url)`

Download an image from a URL and return it as a PIL Image.

Parameters:

Name	Type	Description	Default
`url`	`str`	The URL of the image to download.	required

Returns:

Type	Description
`Image`	The downloaded image as a PIL Image object.

Source code in src/mistral_common/image.py

def download_image(url: str) -> Image.Image:
    r"""Download an image from a URL and return it as a PIL Image.

    Args:
        url: The URL of the image to download.

    Returns:
       The downloaded image as a PIL Image object.
    """
    headers = {"User-Agent": f"mistral-common/{__version__}"}
    try:
        # Make a request to download the image
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Raise an error for bad responses (4xx, 5xx)

        # Convert the image content to a PIL Image
        img = Image.open(io.BytesIO(response.content))
        return img

    except requests.exceptions.RequestException as e:
        raise RuntimeError(f"Error downloading the image from {url}: {e}.")
    except Exception as e:
        raise RuntimeError(f"Error converting to PIL image: {e}")

`image_from_chunk(chunk)`

Get a serializable image from a chunk.

Parameters:

Name	Type	Description	Default
`chunk`	`Union[ImageURLChunk, ImageChunk]`	The chunk to get the image from.	required

Returns:

Type	Description
`SerializableImage`	The image as a PIL Image object.

Source code in src/mistral_common/tokens/tokenizers/image.py

def image_from_chunk(chunk: Union[ImageURLChunk, ImageChunk]) -> SerializableImage:
    r"""Get a serializable image from a chunk.

    Args:
        chunk: The chunk to get the image from.

    Returns:
        The image as a PIL Image object.
    """
    if isinstance(chunk, ImageChunk):
        return chunk.image
    if chunk.get_url().startswith("data:image"):
        data = chunk.get_url().split(",")[1]
        image_data = base64.b64decode(data)
        return Image.open(BytesIO(image_data))
    if chunk.get_url().startswith("file"):
        return Image.open(open(chunk.get_url().replace("file://", ""), "rb"))
    if chunk.get_url().startswith("http"):
        return download_image(chunk.get_url())

    raise RuntimeError(f"Unsupported image url scheme {chunk.get_url()}")

`is_cv2_installed()`

Check if OpenCV is installed.

Source code in src/mistral_common/tokens/tokenizers/image.py

def is_cv2_installed() -> bool:
    r"""Check if OpenCV is installed."""
    return _cv2_installed

`normalize(np_image, mean, std)`

Normalize a tensor image with mean and standard deviation.

Parameters:

Name	Type	Description	Default
`np_image`	`ndarray`	Image to be normalized.	required
`mean`	`Tuple[float, float, float]`	Mean for each channel.	required
`std`	`Tuple[float, float, float]`	Standard deviation for each channel.	required

Returns:

Type	Description
`ndarray`	Normalized image with shape (C, H, W).

Source code in src/mistral_common/tokens/tokenizers/image.py

def normalize(
    np_image: np.ndarray,
    mean: Tuple[float, float, float],
    std: Tuple[float, float, float],
) -> np.ndarray:
    r"""Normalize a tensor image with mean and standard deviation.

    Args:
        np_image: Image to be normalized.
        mean: Mean for each channel.
        std: Standard deviation for each channel.

    Returns:
        Normalized image with shape (C, H, W).
    """
    np_image = np_image / 255.0

    assert len(np_image.shape) == 3, f"{np_image.shape=}"
    assert np_image.shape[2] == len(mean) == len(std), f"{np_image.shape=}, {mean=}, {std=}"

    np_image = (np_image - mean) / std

    return np_image.transpose(2, 0, 1)

`transform_image(image, new_size)`

Transform an image to a numpy array with the given size.

Parameters:

Name	Type	Description	Default
`image`	`Image`	Image to be transformed.	required
`new_size`	`Tuple[int, int]`	New size of the image.	required

Returns:

Type	Description
`ndarray`	Transformed image with shape (C, H, W).

Source code in src/mistral_common/tokens/tokenizers/image.py

def transform_image(image: Image.Image, new_size: Tuple[int, int]) -> np.ndarray:
    r"""Transform an image to a numpy array with the given size.

    Args:
        image: Image to be transformed.
        new_size: New size of the image.

    Returns:
        Transformed image with shape (C, H, W).
    """
    if not is_cv2_installed():
        raise ImportError("OpenCV is required for this function. Install it with 'pip install mistral-common[opencv]'")

    np_image = cv2.resize(np.array(_convert_to_rgb(image), dtype=np.float32), new_size, interpolation=cv2.INTER_CUBIC)
    return normalize(np_image, DATASET_MEAN, DATASET_STD)

mistral_common.tokens.tokenizers.multimodal

ImageChunk(**data)

from_openai(openai_chunk) classmethod

to_openai()

ImageConfig(image_patch_size, max_image_size, spatial_merge_size=1) dataclass

ImageEncoder(image_config, special_ids)

__call__(content)

ImageEncoding(tokens, image) dataclass

ImageURLChunk(**data)

from_openai(openai_chunk) classmethod

to_openai()

MultiModalVersion

SpecialImageIDs(img, img_break, img_end) dataclass

download_image(url)

image_from_chunk(chunk)

is_cv2_installed()

normalize(np_image, mean, std)

transform_image(image, new_size)

`mistral_common.tokens.tokenizers.multimodal`

`ImageChunk(**data)`

`from_openai(openai_chunk)` `classmethod`

`to_openai()`

`ImageConfig(image_patch_size, max_image_size, spatial_merge_size=1)` `dataclass`

`ImageEncoder(image_config, special_ids)`

`call(content)`

`ImageEncoding(tokens, image)` `dataclass`

`ImageURLChunk(**data)`

`from_openai(openai_chunk)` `classmethod`

`to_openai()`

`MultiModalVersion`

`SpecialImageIDs(img, img_break, img_end)` `dataclass`

`download_image(url)`

`image_from_chunk(chunk)`

`is_cv2_installed()`

`normalize(np_image, mean, std)`

`transform_image(image, new_size)`