Skip to content

vllm.transformers_utils.processors.bagel

BAGEL processor for image and text inputs.

BagelProcessor

Bases: ProcessorMixin

Constructs a BAGEL processor which wraps a SigLIP image processor and a Qwen2 tokenizer.

Source code in vllm/transformers_utils/processors/bagel.py
class BagelProcessor(ProcessorMixin):
    """
    Constructs a BAGEL processor which wraps a
    SigLIP image processor and a Qwen2 tokenizer.
    """

    attributes = ["image_processor", "tokenizer"]
    image_processor_class = "SiglipImageProcessor"
    tokenizer_class = "AutoTokenizer"

    def __call__(
        self,
        text: TextInput
        | PreTokenizedInput
        | list[TextInput]
        | list[PreTokenizedInput] = None,
        images: ImageInput = None,
        **kwargs: Unpack[BagelProcessorKwargs],
    ):
        """
        Main method to prepare for the model one or several sequences(s) and image(s).
        """
        output_kwargs = self._merge_kwargs(
            BagelProcessorKwargs,
            tokenizer_init_kwargs=self.tokenizer.init_kwargs,
            **kwargs,
        )

        if images is not None:
            # Process images with the image processor
            pixel_values = self.image_processor(
                images, **output_kwargs["images_kwargs"]
            )
        else:
            pixel_values = {}

        text_inputs = (
            self.tokenizer(text, **output_kwargs["text_kwargs"])
            if text is not None
            else {}
        )

        return BatchFeature(data={**pixel_values, **text_inputs})

    def batch_decode(self, *args, **kwargs):
        """
        This method forwards all its arguments to Qwen2TokenizerFast's batch_decode.
        """
        return self.tokenizer.batch_decode(*args, **kwargs)

    def decode(self, *args, **kwargs):
        """
        This method forwards all its arguments to Qwen2TokenizerFast's decode.
        """
        return self.tokenizer.decode(*args, **kwargs)

    @property
    def model_input_names(self):
        tokenizer_input_names = self.tokenizer.model_input_names
        image_processor_input_names = self.image_processor.model_input_names
        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))

attributes class-attribute instance-attribute

attributes = ['image_processor', 'tokenizer']

image_processor_class class-attribute instance-attribute

image_processor_class = 'SiglipImageProcessor'

model_input_names property

model_input_names

tokenizer_class class-attribute instance-attribute

tokenizer_class = 'AutoTokenizer'

__call__

__call__(
    text: TextInput
    | PreTokenizedInput
    | list[TextInput]
    | list[PreTokenizedInput] = None,
    images: ImageInput = None,
    **kwargs: Unpack[BagelProcessorKwargs],
)

Main method to prepare for the model one or several sequences(s) and image(s).

Source code in vllm/transformers_utils/processors/bagel.py
def __call__(
    self,
    text: TextInput
    | PreTokenizedInput
    | list[TextInput]
    | list[PreTokenizedInput] = None,
    images: ImageInput = None,
    **kwargs: Unpack[BagelProcessorKwargs],
):
    """
    Main method to prepare for the model one or several sequences(s) and image(s).
    """
    output_kwargs = self._merge_kwargs(
        BagelProcessorKwargs,
        tokenizer_init_kwargs=self.tokenizer.init_kwargs,
        **kwargs,
    )

    if images is not None:
        # Process images with the image processor
        pixel_values = self.image_processor(
            images, **output_kwargs["images_kwargs"]
        )
    else:
        pixel_values = {}

    text_inputs = (
        self.tokenizer(text, **output_kwargs["text_kwargs"])
        if text is not None
        else {}
    )

    return BatchFeature(data={**pixel_values, **text_inputs})

batch_decode

batch_decode(*args, **kwargs)

This method forwards all its arguments to Qwen2TokenizerFast's batch_decode.

Source code in vllm/transformers_utils/processors/bagel.py
def batch_decode(self, *args, **kwargs):
    """
    This method forwards all its arguments to Qwen2TokenizerFast's batch_decode.
    """
    return self.tokenizer.batch_decode(*args, **kwargs)

decode

decode(*args, **kwargs)

This method forwards all its arguments to Qwen2TokenizerFast's decode.

Source code in vllm/transformers_utils/processors/bagel.py
def decode(self, *args, **kwargs):
    """
    This method forwards all its arguments to Qwen2TokenizerFast's decode.
    """
    return self.tokenizer.decode(*args, **kwargs)

BagelProcessorKwargs

Bases: ProcessingKwargs

Source code in vllm/transformers_utils/processors/bagel.py
class BagelProcessorKwargs(ProcessingKwargs, total=False):  # type: ignore[call-arg]
    _defaults = {
        "images_kwargs": {
            "return_tensors": "pt",
        },
    }

_defaults class-attribute instance-attribute

_defaults = {'images_kwargs': {'return_tensors': 'pt'}}