Skip to content

mistral_common.guidance.grammar_factory

GrammarFactory(tokenizer)

Generates grammars for a given tokenizer.

Parameters:

Name Type Description Default
tokenizer MistralTokenizer

The Mistral tokenizer to generate grammars for.

required

Raises:

Type Description
ValueError

If the tokenizer is not supported (see is_supported).

Source code in src/mistral_common/guidance/grammar_factory.py
def __init__(self, tokenizer: MistralTokenizer) -> None:
    r"""Initialize the grammar factory.

    Args:
        tokenizer: The Mistral tokenizer to generate grammars for.

    Raises:
        ValueError: If the tokenizer is not supported (see
            [`is_supported`][mistral_common.guidance.grammar_factory.GrammarFactory.is_supported]).
    """
    assert_llguidance_installed()
    assert_jinja2_installed()
    self._tokenizer = tokenizer.instruct_tokenizer.tokenizer
    if not self.is_supported(tokenizer):
        raise ValueError(
            f"Guidance requires a Tekken tokenizer with version >= v11, "
            f"got {type(self._tokenizer).__name__} {self._tokenizer.version.value}"
        )
    self._llg_tokenizer = from_mistral_tokenizer(tokenizer)
    self._special_token_map = self._build_special_token_map()

get_lark_for_json_schema(template, json_schema)

Returns a lark grammar that only accepts JSON objects matching the given schema.

Parameters:

Name Type Description Default
template str

Jinja template to render as a string.

required
json_schema dict[str, Any]

The JSON schema to validate against.

required

Returns:

Type Description
str

The rendered lark grammar string that only matches the given JSON schema.

Source code in src/mistral_common/guidance/grammar_factory.py
def get_lark_for_json_schema(self, template: str, json_schema: dict[str, Any]) -> str:
    r"""Returns a lark grammar that only accepts JSON objects matching the given schema.

    Args:
        template: Jinja template to render as a string.
        json_schema: The JSON schema to validate against.

    Returns:
        The rendered lark grammar string that only matches the given JSON schema.
    """
    return self.get_lark_from_jinja(
        template=template,
        mode=ToolChoiceEnum.none,
        tools=None,
        json_schema=json_schema,
        parallel_tool_calls=True,
        json_only=True,
    )

get_lark_from_jinja(template, mode, tools, json_schema, parallel_tool_calls, json_only=False)

Renders a lark grammar from a jinja template.

Parameters:

Name Type Description Default
template str

Jinja template to render as a string.

required
mode ToolChoice

The function calling mode (auto, any, none).

required
tools list[Tool] | None

The list of tools available.

required
json_schema dict[str, Any] | None

JSON schema to additionally allow, unioned with the grammar.

required
parallel_tool_calls bool

Whether parallel tool calls are allowed.

required
json_only bool

If True, generates only JSON schema grammar without text/tool call alternatives.

False

Returns:

Type Description
str

The rendered lark grammar string.

Source code in src/mistral_common/guidance/grammar_factory.py
def get_lark_from_jinja(
    self,
    template: str,
    mode: ToolChoice,
    tools: list[Tool] | None,
    json_schema: dict[str, Any] | None,
    parallel_tool_calls: bool,
    json_only: bool = False,
) -> str:
    r"""Renders a lark grammar from a jinja template.

    Args:
        template: Jinja template to render as a string.
        mode: The function calling mode (auto, any, none).
        tools: The list of tools available.
        json_schema: JSON schema to additionally allow, unioned with the grammar.
        parallel_tool_calls: Whether parallel tool calls are allowed.
        json_only: If True, generates only JSON schema grammar without text/tool call alternatives.

    Returns:
        The rendered lark grammar string.
    """
    # Verifies that the NamedToolChoice has a valid tool and "any", "required" has tools.
    _validate_mode_and_tools(mode=mode, tools=tools)

    fcall = _convert_tool_calls(tools, mode, parallel_tool_calls, self._special_token_lark)
    json_schema_str = json.dumps(json_schema, ensure_ascii=False) if json_schema else None
    # NamedToolChoice forces a specific tool, which maps to "required" grammar.
    template_mode = ToolChoiceEnum.required if isinstance(mode, NamedToolChoice) else ToolChoiceEnum(mode)
    think_with_json = self._tokenizer.version.supports_model_settings

    begin_think_token = self._get_optional_special_token_lark(SpecialTokens.begin_think.value)
    end_think_token = self._get_optional_special_token_lark(SpecialTokens.end_think.value)

    return _cached_get_lark_from_jinja(
        template=template,
        mode=template_mode.value,
        fcall=fcall,
        json_schema_str=json_schema_str,
        parallel_tool_calls=parallel_tool_calls,
        json_only=json_only,
        think_with_json=think_with_json,
        begin_think_token=begin_think_token,
        end_think_token=end_think_token,
    )

is_supported(tokenizer) staticmethod

Checks whether the given tokenizer is supported by guidance.

Guidance requires a Tekken tokenizer with version >= v11.

Parameters:

Name Type Description Default
tokenizer MistralTokenizer

The Mistral tokenizer to check.

required

Returns:

Type Description
bool

Whether the tokenizer is supported.

Source code in src/mistral_common/guidance/grammar_factory.py
@staticmethod
def is_supported(tokenizer: MistralTokenizer) -> bool:
    r"""Checks whether the given tokenizer is supported by guidance.

    Guidance requires a Tekken tokenizer with version >= v11.

    Args:
        tokenizer: The Mistral tokenizer to check.

    Returns:
        Whether the tokenizer is supported.
    """
    inner = tokenizer.instruct_tokenizer.tokenizer
    return is_tekkenizer(inner) and not inner.version < TokenizerVersion.v11

select_jinja_template(reasoning)

Selects and returns the appropriate jinja template content based on tokenizer version and reasoning mode.

Parameters:

Name Type Description Default
reasoning bool

Whether reasoning/thinking mode is enabled.

required

Returns:

Type Description
str

The jinja template content as a string.

Source code in src/mistral_common/guidance/grammar_factory.py
def select_jinja_template(self, reasoning: bool) -> str:
    r"""Selects and returns the appropriate jinja template content based on tokenizer version and reasoning mode.

    Args:
        reasoning: Whether reasoning/thinking mode is enabled.

    Returns:
        The jinja template content as a string.
    """
    return _cached_get_jinja_template(tokenizer_version=self._tokenizer.version, reasoning=reasoning)