vllm.compilation.vllm_inductor_pass

logger `module-attribute` ¶

logger = init_logger(__name__)

PrinterInductorPass ¶

Bases: VllmInductorPass

Source code in vllm/compilation/vllm_inductor_pass.py

class PrinterInductorPass(VllmInductorPass):

    def __init__(self, name: str, config: PassConfig, always=False):
        super().__init__(config)
        self.name = name
        self.always = always

    def __call__(self, graph: torch.fx.Graph):
        self.dump_graph(graph, self.name, always=self.always)

always `instance-attribute` ¶

always = always

name `instance-attribute` ¶

name = name

call ¶

__call__(graph: Graph)

Source code in vllm/compilation/vllm_inductor_pass.py

def __call__(self, graph: torch.fx.Graph):
    self.dump_graph(graph, self.name, always=self.always)

init ¶

__init__(name: str, config: PassConfig, always=False)

Source code in vllm/compilation/vllm_inductor_pass.py

def __init__(self, name: str, config: PassConfig, always=False):
    super().__init__(config)
    self.name = name
    self.always = always

VllmInductorPass ¶

Bases: InductorPass

An inductor pass with access to vLLM PassConfig. It provides timing, logging, and dumping utilities.

Source code in vllm/compilation/vllm_inductor_pass.py

class VllmInductorPass(InductorPass):
    """
    An inductor pass with access to vLLM PassConfig.
    It provides timing, logging, and dumping utilities.
    """

    def __init__(self, config: VllmConfig):
        self.pass_config = config.compilation_config.pass_config
        self.model_dtype = config.model_config.dtype if config.model_config \
            else None
        self.device = config.device_config.device if config.device_config \
            else None
        self.pass_name = self.__class__.__name__

    def dump_graph(self, graph: torch.fx.Graph, stage: str, always=False):
        lazy_format_graph_code(stage, graph.owning_module)

        if stage in self.pass_config.dump_graph_stages or always:
            # Make sure filename includes rank in the distributed setting
            parallel = p_is_init() and get_tp_world_size() > 1
            rank = f"-{get_tp_rank()}" if parallel else ""
            filepath = self.pass_config.dump_graph_dir / f"{stage}{rank}.py"

            logger.info("%s printing graph to %s", self.pass_name, filepath)
            with open(filepath, "w") as f:
                src = graph.python_code(root_module="self", verbose=True).src
                # Add imports so it's not full of errors
                print("import torch; from torch import device", file=f)
                print(src, file=f)

    def begin(self):
        self._start_time = time.perf_counter_ns()

    def end_and_log(self):
        self._end_time = time.perf_counter_ns()
        duration_ms = float(self._end_time - self._start_time) / 1.0e6
        logger.debug("%s completed in %.1f ms", self.pass_name, duration_ms)

device `instance-attribute` ¶

device = device if device_config else None

model_dtype `instance-attribute` ¶

model_dtype = dtype if model_config else None

pass_config `instance-attribute` ¶

pass_config = pass_config

pass_name `instance-attribute` ¶

pass_name = __name__

init ¶

__init__(config: VllmConfig)

Source code in vllm/compilation/vllm_inductor_pass.py

def __init__(self, config: VllmConfig):
    self.pass_config = config.compilation_config.pass_config
    self.model_dtype = config.model_config.dtype if config.model_config \
        else None
    self.device = config.device_config.device if config.device_config \
        else None
    self.pass_name = self.__class__.__name__

begin ¶

begin()

Source code in vllm/compilation/vllm_inductor_pass.py

def begin(self):
    self._start_time = time.perf_counter_ns()

dump_graph ¶

dump_graph(graph: Graph, stage: str, always=False)

Source code in vllm/compilation/vllm_inductor_pass.py

def dump_graph(self, graph: torch.fx.Graph, stage: str, always=False):
    lazy_format_graph_code(stage, graph.owning_module)

    if stage in self.pass_config.dump_graph_stages or always:
        # Make sure filename includes rank in the distributed setting
        parallel = p_is_init() and get_tp_world_size() > 1
        rank = f"-{get_tp_rank()}" if parallel else ""
        filepath = self.pass_config.dump_graph_dir / f"{stage}{rank}.py"

        logger.info("%s printing graph to %s", self.pass_name, filepath)
        with open(filepath, "w") as f:
            src = graph.python_code(root_module="self", verbose=True).src
            # Add imports so it's not full of errors
            print("import torch; from torch import device", file=f)
            print(src, file=f)

end_and_log ¶

end_and_log()

Source code in vllm/compilation/vllm_inductor_pass.py

def end_and_log(self):
    self._end_time = time.perf_counter_ns()
    duration_ms = float(self._end_time - self._start_time) / 1.0e6
    logger.debug("%s completed in %.1f ms", self.pass_name, duration_ms)

vllm.compilation.vllm_inductor_pass

logger module-attribute ¶

PrinterInductorPass ¶

always instance-attribute ¶

name instance-attribute ¶

__call__ ¶

__init__ ¶

VllmInductorPass ¶

device instance-attribute ¶

model_dtype instance-attribute ¶

pass_config instance-attribute ¶

pass_name instance-attribute ¶

__init__ ¶

begin ¶

dump_graph ¶

end_and_log ¶

logger `module-attribute` ¶

always `instance-attribute` ¶

name `instance-attribute` ¶

call ¶

init ¶

device `instance-attribute` ¶

model_dtype `instance-attribute` ¶

pass_config `instance-attribute` ¶

pass_name `instance-attribute` ¶

init ¶